Go to the documentation of this file.
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
28 #define GET_INSTRINFO_NAMED_OPS
29 #define GET_INSTRMAP_INFO
30 #include "AMDGPUGenInstrInfo.inc"
40 unsigned getBitMask(
unsigned Shift,
unsigned Width) {
47 unsigned packBits(
unsigned Src,
unsigned Dst,
unsigned Shift,
unsigned Width) {
55 unsigned unpackBits(
unsigned Src,
unsigned Shift,
unsigned Width) {
75 unsigned getExpcntBitWidth(
unsigned VersionMajor) {
return 3; }
88 unsigned getVmcntBitShiftHi(
unsigned VersionMajor) {
return 14; }
208 #define GET_MIMGBaseOpcodesTable_IMPL
209 #define GET_MIMGDimInfoTable_IMPL
210 #define GET_MIMGInfoTable_IMPL
211 #define GET_MIMGLZMappingTable_IMPL
212 #define GET_MIMGMIPMappingTable_IMPL
213 #define GET_MIMGBiasMappingTable_IMPL
214 #define GET_MIMGOffsetMappingTable_IMPL
215 #define GET_MIMGG16MappingTable_IMPL
216 #define GET_MAIInstInfoTable_IMPL
217 #include "AMDGPUGenSearchableTables.inc"
220 unsigned VDataDwords,
unsigned VAddrDwords) {
221 const MIMGInfo *
Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
222 VDataDwords, VAddrDwords);
236 return NewInfo ? NewInfo->
Opcode : -1;
241 bool IsG16Supported) {
248 AddrWords += AddrComponents;
256 if ((IsA16 && !IsG16Supported) || BaseOpcode->
G16)
317 #define GET_MTBUFInfoTable_DECL
318 #define GET_MTBUFInfoTable_IMPL
319 #define GET_MUBUFInfoTable_DECL
320 #define GET_MUBUFInfoTable_IMPL
321 #define GET_SMInfoTable_DECL
322 #define GET_SMInfoTable_IMPL
323 #define GET_VOP1InfoTable_DECL
324 #define GET_VOP1InfoTable_IMPL
325 #define GET_VOP2InfoTable_DECL
326 #define GET_VOP2InfoTable_IMPL
327 #define GET_VOP3InfoTable_DECL
328 #define GET_VOP3InfoTable_IMPL
329 #define GET_VOPC64DPPTable_DECL
330 #define GET_VOPC64DPPTable_IMPL
331 #define GET_VOPC64DPP8Table_DECL
332 #define GET_VOPC64DPP8Table_IMPL
333 #define GET_VOPDComponentTable_DECL
334 #define GET_VOPDComponentTable_IMPL
335 #define GET_VOPDPairs_DECL
336 #define GET_VOPDPairs_IMPL
337 #define GET_VOPTrue16Table_DECL
338 #define GET_VOPTrue16Table_IMPL
339 #define GET_WMMAOpcode2AddrMappingTable_DECL
340 #define GET_WMMAOpcode2AddrMappingTable_IMPL
341 #define GET_WMMAOpcode3AddrMappingTable_DECL
342 #define GET_WMMAOpcode3AddrMappingTable_IMPL
343 #include "AMDGPUGenSearchableTables.inc"
347 return Info ?
Info->BaseOpcode : -1;
351 const MTBUFInfo *
Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
362 return Info ?
Info->has_vaddr :
false;
367 return Info ?
Info->has_srsrc :
false;
372 return Info ?
Info->has_soffset :
false;
377 return Info ?
Info->BaseOpcode : -1;
381 const MUBUFInfo *
Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
392 return Info ?
Info->has_vaddr :
false;
397 return Info ?
Info->has_srsrc :
false;
402 return Info ?
Info->has_soffset :
false;
407 return Info ?
Info->IsBufferInv :
false;
411 const SMInfo *
Info = getSMEMOpcodeHelper(Opc);
412 return Info ?
Info->IsBuffer :
false;
417 return Info ?
Info->IsSingle :
false;
422 return Info ?
Info->IsSingle :
false;
427 return Info ?
Info->IsSingle :
false;
431 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
436 return Info ?
Info->is_dgemm :
false;
441 return Info ?
Info->is_gfx940_xdl :
false;
447 return {
Info->CanBeVOPDX,
true};
449 return {
false,
false};
462 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
463 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
464 Opc == AMDGPU::V_MAC_F32_e64_vi ||
465 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
466 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
467 Opc == AMDGPU::V_MAC_F16_e64_vi ||
468 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
469 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
470 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
471 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
472 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
473 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
474 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
475 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
476 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
477 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
478 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
479 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
483 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
484 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
485 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
486 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
491 return Info ?
Info->IsTrue16 :
false;
496 return Info ?
Info->Opcode3Addr : ~0u;
501 return Info ?
Info->Opcode2Addr : ~0u;
508 return getMCOpcodeGen(Opcode,
static_cast<Subtarget
>(Gen));
512 const VOPDInfo *
Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
519 auto OpX = getVOPDBaseFromComponent(
Info->OpX);
520 auto OpY = getVOPDBaseFromComponent(
Info->OpY);
522 return {OpX->BaseVOP, OpY->BaseVOP};
534 HasSrc2Acc = TiedIdx != -1;
541 for (CompOprIdx =
Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
543 MandatoryLiteralIdx = CompOprIdx;
564 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
571 unsigned BanksNum =
BANKS_NUM[CompOprIdx];
572 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
573 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
589 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
592 const auto &Comp = CompInfo[CompIdx];
595 RegIndices[
DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
598 unsigned CompSrcIdx = CompOprIdx -
DST_NUM;
600 Comp.hasRegSrcOperand(CompSrcIdx)
601 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
616 const auto &OpXDesc =
InstrInfo->get(OpX);
617 const auto &OpYDesc =
InstrInfo->get(OpY);
639 std::optional<bool> XnackRequested;
640 std::optional<bool> SramEccRequested;
642 for (
const std::string &Feature : Features.
getFeatures()) {
643 if (Feature ==
"+xnack")
644 XnackRequested =
true;
645 else if (Feature ==
"-xnack")
646 XnackRequested =
false;
647 else if (Feature ==
"+sramecc")
648 SramEccRequested =
true;
649 else if (Feature ==
"-sramecc")
650 SramEccRequested =
false;
656 if (XnackRequested) {
657 if (XnackSupported) {
663 if (*XnackRequested) {
664 errs() <<
"warning: xnack 'On' was requested for a processor that does "
667 errs() <<
"warning: xnack 'Off' was requested for a processor that "
668 "does not support it!\n";
673 if (SramEccRequested) {
674 if (SramEccSupported) {
681 if (*SramEccRequested) {
682 errs() <<
"warning: sramecc 'On' was requested for a processor that "
683 "does not support it!\n";
685 errs() <<
"warning: sramecc 'Off' was requested for a processor that "
686 "does not support it!\n";
704 TargetID.
split(TargetIDSplit,
':');
706 for (
const auto &FeatureString : TargetIDSplit) {
707 if (FeatureString.startswith(
"xnack"))
709 if (FeatureString.startswith(
"sramecc"))
715 std::string StringRep;
721 StreamRep << TargetTriple.getArchName() <<
'-'
722 << TargetTriple.getVendorName() <<
'-'
723 << TargetTriple.getOSName() <<
'-'
724 << TargetTriple.getEnvironmentName() <<
'-';
726 std::string Processor;
737 std::string Features;
739 switch (*HsaAbiVersion) {
743 if (Processor ==
"gfx600") {
744 }
else if (Processor ==
"gfx601") {
745 }
else if (Processor ==
"gfx602") {
746 }
else if (Processor ==
"gfx700") {
747 }
else if (Processor ==
"gfx701") {
748 }
else if (Processor ==
"gfx702") {
749 }
else if (Processor ==
"gfx703") {
750 }
else if (Processor ==
"gfx704") {
751 }
else if (Processor ==
"gfx705") {
752 }
else if (Processor ==
"gfx801") {
755 "AMD GPU code object V2 does not support processor " +
756 Twine(Processor) +
" without XNACK");
757 }
else if (Processor ==
"gfx802") {
758 }
else if (Processor ==
"gfx803") {
759 }
else if (Processor ==
"gfx805") {
760 }
else if (Processor ==
"gfx810") {
763 "AMD GPU code object V2 does not support processor " +
764 Twine(Processor) +
" without XNACK");
765 }
else if (Processor ==
"gfx900") {
767 Processor =
"gfx901";
768 }
else if (Processor ==
"gfx902") {
770 Processor =
"gfx903";
771 }
else if (Processor ==
"gfx904") {
773 Processor =
"gfx905";
774 }
else if (Processor ==
"gfx906") {
776 Processor =
"gfx907";
777 }
else if (Processor ==
"gfx90c") {
780 "AMD GPU code object V2 does not support processor " +
781 Twine(Processor) +
" with XNACK being ON or ANY");
784 "AMD GPU code object V2 does not support processor " +
791 Features +=
"+xnack";
795 Features +=
"+sram-ecc";
801 Features +=
":sramecc-";
803 Features +=
":sramecc+";
806 Features +=
":xnack-";
808 Features +=
":xnack+";
815 StreamRep << Processor << Features;
831 unsigned BytesPerCU = 0;
866 unsigned FlatWorkGroupSize) {
867 assert(FlatWorkGroupSize != 0);
877 unsigned MaxBarriers = 16;
881 return std::min(MaxWaves /
N, MaxBarriers);
898 unsigned FlatWorkGroupSize) {
913 unsigned FlatWorkGroupSize) {
973 return Addressable ? AddressableNumSGPRs : 108;
974 if (
Version.Major >= 8 && !Addressable)
975 AddressableNumSGPRs = 112;
980 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
984 bool FlatScrUsed,
bool XNACKUsed) {
985 unsigned ExtraSGPRs = 0;
1021 std::optional<bool> EnableWavefrontSize32) {
1025 bool IsWave32 = EnableWavefrontSize32 ?
1026 *EnableWavefrontSize32 :
1030 return IsWave32 ? 24 : 12;
1033 return IsWave32 ? 16 : 8;
1035 return IsWave32 ? 8 : 4;
1039 std::optional<bool> EnableWavefrontSize32) {
1043 bool IsWave32 = EnableWavefrontSize32 ?
1044 *EnableWavefrontSize32 :
1047 return IsWave32 ? 8 : 4;
1057 return IsWave32 ? 1536 : 768;
1058 return IsWave32 ? 1024 : 512;
1081 if (WavesPerEU >= MaxWavesPerEU)
1087 unsigned MaxNumVGPRs =
alignDown(TotNumVGPRs / WavesPerEU, Granule);
1089 if (MaxNumVGPRs ==
alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1093 if (WavesPerEU < MinWavesPerEU)
1096 unsigned MaxNumVGPRsNext =
alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1097 unsigned MinNumVGPRs = 1 +
std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1098 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1107 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1111 std::optional<bool> EnableWavefrontSize32) {
1124 memset(&Header, 0,
sizeof(Header));
1126 Header.amd_kernel_code_version_major = 1;
1127 Header.amd_kernel_code_version_minor = 2;
1128 Header.amd_machine_kind = 1;
1129 Header.amd_machine_version_major =
Version.Major;
1130 Header.amd_machine_version_minor =
Version.Minor;
1131 Header.amd_machine_version_stepping =
Version.Stepping;
1132 Header.kernel_code_entry_byte_offset =
sizeof(Header);
1133 Header.wavefront_size = 6;
1137 Header.call_convention = -1;
1141 Header.kernarg_segment_alignment = 4;
1142 Header.group_segment_alignment = 4;
1143 Header.private_segment_alignment = 4;
1147 Header.wavefront_size = 5;
1150 Header.compute_pgm_resource_registers |=
1161 memset(&KD, 0,
sizeof(KD));
1164 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1167 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1169 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1171 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1174 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1177 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1180 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1184 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1210 std::pair<int, int> Default,
1211 bool OnlyFirstRequired) {
1213 if (!A.isStringAttribute())
1217 std::pair<int, int> Ints = Default;
1218 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(
',');
1219 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1220 Ctx.
emitError(
"can't parse first integer attribute " + Name);
1223 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1224 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1225 Ctx.
emitError(
"can't parse second integer attribute " + Name);
1234 return (1 << (getVmcntBitWidthLo(
Version.Major) +
1235 getVmcntBitWidthHi(
Version.Major))) -
1240 return (1 << getExpcntBitWidth(
Version.Major)) - 1;
1244 return (1 << getLgkmcntBitWidth(
Version.Major)) - 1;
1248 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(
Version.Major),
1249 getVmcntBitWidthLo(
Version.Major));
1250 unsigned Expcnt = getBitMask(getExpcntBitShift(
Version.Major),
1251 getExpcntBitWidth(
Version.Major));
1252 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(
Version.Major),
1253 getLgkmcntBitWidth(
Version.Major));
1254 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(
Version.Major),
1255 getVmcntBitWidthHi(
Version.Major));
1256 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1260 unsigned VmcntLo = unpackBits(
Waitcnt, getVmcntBitShiftLo(
Version.Major),
1261 getVmcntBitWidthLo(
Version.Major));
1262 unsigned VmcntHi = unpackBits(
Waitcnt, getVmcntBitShiftHi(
Version.Major),
1263 getVmcntBitWidthHi(
Version.Major));
1264 return VmcntLo | VmcntHi << getVmcntBitWidthLo(
Version.Major);
1269 getExpcntBitWidth(
Version.Major));
1274 getLgkmcntBitWidth(
Version.Major));
1278 unsigned &Vmcnt,
unsigned &Expcnt,
unsigned &Lgkmcnt) {
1295 getVmcntBitWidthLo(
Version.Major));
1296 return packBits(Vmcnt >> getVmcntBitWidthLo(
Version.Major),
Waitcnt,
1297 getVmcntBitShiftHi(
Version.Major),
1298 getVmcntBitWidthHi(
Version.Major));
1303 return packBits(Expcnt,
Waitcnt, getExpcntBitShift(
Version.Major),
1304 getExpcntBitWidth(
Version.Major));
1309 return packBits(Lgkmcnt,
Waitcnt, getLgkmcntBitShift(
Version.Major),
1310 getLgkmcntBitWidth(
Version.Major));
1314 unsigned Vmcnt,
unsigned Expcnt,
unsigned Lgkmcnt) {
1340 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].
Name.empty() &&
1349 for (
int Idx = 0; Idx < OpInfoSize; ++Idx) {
1350 if (
Test(OpInfo[Idx])) {
1363 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1368 T
Context,
bool QuickCheck =
true) {
1370 return Op.Encoding ==
Id && !
Op.Name.empty();
1375 if (QuickCheck && isValidOpr<T>(
Id, OpInfo, OpInfoSize,
Context) &&
1379 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1390 for (
int Idx = 0; Idx < Size; ++Idx) {
1391 const auto &
Op =
Opr[Idx];
1392 if (
Op.isSupported(STI))
1393 Enc |=
Op.encode(
Op.Default);
1399 int Size,
unsigned Code,
1400 bool &HasNonDefaultVal,
1402 unsigned UsedOprMask = 0;
1403 HasNonDefaultVal =
false;
1404 for (
int Idx = 0; Idx < Size; ++Idx) {
1405 const auto &
Op =
Opr[Idx];
1406 if (!
Op.isSupported(STI))
1408 UsedOprMask |=
Op.getMask();
1409 unsigned Val =
Op.decode(Code);
1410 if (!
Op.isValid(Val))
1412 HasNonDefaultVal |= (Val !=
Op.Default);
1414 return (Code & ~UsedOprMask) == 0;
1418 unsigned Code,
int &Idx,
StringRef &Name,
1419 unsigned &Val,
bool &IsDefault,
1421 while (Idx < Size) {
1422 const auto &
Op =
Opr[Idx++];
1423 if (
Op.isSupported(STI)) {
1425 Val =
Op.decode(Code);
1426 IsDefault = (Val ==
Op.Default);
1436 if (InputVal < 0 || InputVal >
Op.Max)
1438 return Op.encode(InputVal);
1443 unsigned &UsedOprMask,
1446 for (
int Idx = 0; Idx < Size; ++Idx) {
1447 const auto &
Op =
Opr[Idx];
1448 if (
Op.Name == Name) {
1449 if (!
Op.isSupported(STI)) {
1453 auto OprMask =
Op.getMask();
1454 if (OprMask & UsedOprMask)
1456 UsedOprMask |= OprMask;
1470 static int Default = -1;
1479 HasNonDefaultVal, STI);
1503 int Idx = getOprIdx<const MCSubtargetInfo &>(Name,
Opr,
OPR_SIZE, STI);
1504 return (Idx < 0) ? Idx :
Opr[Idx].Encoding;
1508 return 0 <=
Id && isUInt<ID_WIDTH_>(
Id);
1516 return 0 <= (
Width - 1) && isUInt<WIDTH_M1_WIDTH_>(
Width - 1);
1526 int Idx = getOprIdx<const MCSubtargetInfo &>(
Id,
Opr,
OPR_SIZE, STI);
1527 return (Idx < 0) ?
"" :
Opr[Idx].Name;
1562 if (Val.Tgt <=
Id &&
Id <= Val.Tgt + Val.MaxIndex) {
1563 Index = (Val.MaxIndex == 0) ? -1 : (
Id - Val.Tgt);
1574 if (Val.MaxIndex == 0 && Name == Val.Name)
1577 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1578 StringRef Suffix = Name.drop_front(Val.Name.size());
1585 if (Suffix.
size() > 1 && Suffix[0] ==
'0')
1588 return Val.Tgt +
Id;
1617 namespace MTBUFFormat {
1643 if (Name == lookupTable[
Id])
1739 int Idx = getOprIdx<const MCSubtargetInfo &>(Name,
Msg,
MSG_SIZE, STI);
1740 return (Idx < 0) ? Idx :
Msg[Idx].Encoding;
1748 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId,
Msg,
MSG_SIZE, STI);
1749 return (Idx < 0) ?
"" :
Msg[Idx].Name;
1756 for (
int i =
F;
i < L; ++
i) {
1769 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1848 return F.getFnAttributeAsParsedInteger(
"InitialPSInputAddr", 0);
1853 return F.getFnAttributeAsParsedInteger(
1854 "amdgpu-color-export",
1859 return F.getFnAttributeAsParsedInteger(
"amdgpu-depth-export", 0) != 0;
2025 return STI.
getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
2037 int32_t ArgNumVGPR) {
2038 if (has90AInsts && ArgNumAGPR)
2039 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2040 return std::max(ArgNumVGPR, ArgNumAGPR);
2045 const unsigned FirstSubReg =
TRI->getSubReg(
Reg, AMDGPU::sub0);
2046 return SGPRClass.
contains(FirstSubReg != 0 ? FirstSubReg :
Reg) ||
2050 #define MAP_REG2REG \
2051 using namespace AMDGPU; \
2053 default: return Reg; \
2054 CASE_CI_VI(FLAT_SCR) \
2055 CASE_CI_VI(FLAT_SCR_LO) \
2056 CASE_CI_VI(FLAT_SCR_HI) \
2057 CASE_VI_GFX9PLUS(TTMP0) \
2058 CASE_VI_GFX9PLUS(TTMP1) \
2059 CASE_VI_GFX9PLUS(TTMP2) \
2060 CASE_VI_GFX9PLUS(TTMP3) \
2061 CASE_VI_GFX9PLUS(TTMP4) \
2062 CASE_VI_GFX9PLUS(TTMP5) \
2063 CASE_VI_GFX9PLUS(TTMP6) \
2064 CASE_VI_GFX9PLUS(TTMP7) \
2065 CASE_VI_GFX9PLUS(TTMP8) \
2066 CASE_VI_GFX9PLUS(TTMP9) \
2067 CASE_VI_GFX9PLUS(TTMP10) \
2068 CASE_VI_GFX9PLUS(TTMP11) \
2069 CASE_VI_GFX9PLUS(TTMP12) \
2070 CASE_VI_GFX9PLUS(TTMP13) \
2071 CASE_VI_GFX9PLUS(TTMP14) \
2072 CASE_VI_GFX9PLUS(TTMP15) \
2073 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2074 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2075 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2076 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2077 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2078 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2079 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2080 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2081 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2082 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2083 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2084 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2085 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2086 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2087 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2088 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2089 CASE_GFXPRE11_GFX11PLUS(M0) \
2090 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2091 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2094 #define CASE_CI_VI(node) \
2095 assert(!isSI(STI)); \
2096 case node: return isCI(STI) ? node##_ci : node##_vi;
2098 #define CASE_VI_GFX9PLUS(node) \
2099 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2101 #define CASE_GFXPRE11_GFX11PLUS(node) \
2102 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2104 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2105 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2114 #undef CASE_VI_GFX9PLUS
2115 #undef CASE_GFXPRE11_GFX11PLUS
2116 #undef CASE_GFXPRE11_GFX11PLUS_TO
2118 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2119 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2120 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2121 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2129 case AMDGPU::SRC_SHARED_BASE_LO:
2130 case AMDGPU::SRC_SHARED_BASE:
2131 case AMDGPU::SRC_SHARED_LIMIT_LO:
2132 case AMDGPU::SRC_SHARED_LIMIT:
2133 case AMDGPU::SRC_PRIVATE_BASE_LO:
2134 case AMDGPU::SRC_PRIVATE_BASE:
2135 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2136 case AMDGPU::SRC_PRIVATE_LIMIT:
2137 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2139 case AMDGPU::SRC_VCCZ:
2140 case AMDGPU::SRC_EXECZ:
2141 case AMDGPU::SRC_SCC:
2143 case AMDGPU::SGPR_NULL:
2151 #undef CASE_VI_GFX9PLUS
2152 #undef CASE_GFXPRE11_GFX11PLUS
2153 #undef CASE_GFXPRE11_GFX11PLUS_TO
2158 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2165 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2172 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2201 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2210 case AMDGPU::VGPR_LO16RegClassID:
2211 case AMDGPU::VGPR_HI16RegClassID:
2212 case AMDGPU::SGPR_LO16RegClassID:
2213 case AMDGPU::AGPR_LO16RegClassID:
2215 case AMDGPU::SGPR_32RegClassID:
2216 case AMDGPU::VGPR_32RegClassID:
2217 case AMDGPU::VRegOrLds_32RegClassID:
2218 case AMDGPU::AGPR_32RegClassID:
2219 case AMDGPU::VS_32RegClassID:
2220 case AMDGPU::AV_32RegClassID:
2221 case AMDGPU::SReg_32RegClassID:
2222 case AMDGPU::SReg_32_XM0RegClassID:
2223 case AMDGPU::SRegOrLds_32RegClassID:
2225 case AMDGPU::SGPR_64RegClassID:
2226 case AMDGPU::VS_64RegClassID:
2227 case AMDGPU::SReg_64RegClassID:
2228 case AMDGPU::VReg_64RegClassID:
2229 case AMDGPU::AReg_64RegClassID:
2230 case AMDGPU::SReg_64_XEXECRegClassID:
2231 case AMDGPU::VReg_64_Align2RegClassID:
2232 case AMDGPU::AReg_64_Align2RegClassID:
2233 case AMDGPU::AV_64RegClassID:
2234 case AMDGPU::AV_64_Align2RegClassID:
2236 case AMDGPU::SGPR_96RegClassID:
2237 case AMDGPU::SReg_96RegClassID:
2238 case AMDGPU::VReg_96RegClassID:
2239 case AMDGPU::AReg_96RegClassID:
2240 case AMDGPU::VReg_96_Align2RegClassID:
2241 case AMDGPU::AReg_96_Align2RegClassID:
2242 case AMDGPU::AV_96RegClassID:
2243 case AMDGPU::AV_96_Align2RegClassID:
2245 case AMDGPU::SGPR_128RegClassID:
2246 case AMDGPU::SReg_128RegClassID:
2247 case AMDGPU::VReg_128RegClassID:
2248 case AMDGPU::AReg_128RegClassID:
2249 case AMDGPU::VReg_128_Align2RegClassID:
2250 case AMDGPU::AReg_128_Align2RegClassID:
2251 case AMDGPU::AV_128RegClassID:
2252 case AMDGPU::AV_128_Align2RegClassID:
2254 case AMDGPU::SGPR_160RegClassID:
2255 case AMDGPU::SReg_160RegClassID:
2256 case AMDGPU::VReg_160RegClassID:
2257 case AMDGPU::AReg_160RegClassID:
2258 case AMDGPU::VReg_160_Align2RegClassID:
2259 case AMDGPU::AReg_160_Align2RegClassID:
2260 case AMDGPU::AV_160RegClassID:
2261 case AMDGPU::AV_160_Align2RegClassID:
2263 case AMDGPU::SGPR_192RegClassID:
2264 case AMDGPU::SReg_192RegClassID:
2265 case AMDGPU::VReg_192RegClassID:
2266 case AMDGPU::AReg_192RegClassID:
2267 case AMDGPU::VReg_192_Align2RegClassID:
2268 case AMDGPU::AReg_192_Align2RegClassID:
2269 case AMDGPU::AV_192RegClassID:
2270 case AMDGPU::AV_192_Align2RegClassID:
2272 case AMDGPU::SGPR_224RegClassID:
2273 case AMDGPU::SReg_224RegClassID:
2274 case AMDGPU::VReg_224RegClassID:
2275 case AMDGPU::AReg_224RegClassID:
2276 case AMDGPU::VReg_224_Align2RegClassID:
2277 case AMDGPU::AReg_224_Align2RegClassID:
2278 case AMDGPU::AV_224RegClassID:
2279 case AMDGPU::AV_224_Align2RegClassID:
2281 case AMDGPU::SGPR_256RegClassID:
2282 case AMDGPU::SReg_256RegClassID:
2283 case AMDGPU::VReg_256RegClassID:
2284 case AMDGPU::AReg_256RegClassID:
2285 case AMDGPU::VReg_256_Align2RegClassID:
2286 case AMDGPU::AReg_256_Align2RegClassID:
2287 case AMDGPU::AV_256RegClassID:
2288 case AMDGPU::AV_256_Align2RegClassID:
2290 case AMDGPU::SGPR_288RegClassID:
2291 case AMDGPU::SReg_288RegClassID:
2292 case AMDGPU::VReg_288RegClassID:
2293 case AMDGPU::AReg_288RegClassID:
2294 case AMDGPU::VReg_288_Align2RegClassID:
2295 case AMDGPU::AReg_288_Align2RegClassID:
2296 case AMDGPU::AV_288RegClassID:
2297 case AMDGPU::AV_288_Align2RegClassID:
2299 case AMDGPU::SGPR_320RegClassID:
2300 case AMDGPU::SReg_320RegClassID:
2301 case AMDGPU::VReg_320RegClassID:
2302 case AMDGPU::AReg_320RegClassID:
2303 case AMDGPU::VReg_320_Align2RegClassID:
2304 case AMDGPU::AReg_320_Align2RegClassID:
2305 case AMDGPU::AV_320RegClassID:
2306 case AMDGPU::AV_320_Align2RegClassID:
2308 case AMDGPU::SGPR_352RegClassID:
2309 case AMDGPU::SReg_352RegClassID:
2310 case AMDGPU::VReg_352RegClassID:
2311 case AMDGPU::AReg_352RegClassID:
2312 case AMDGPU::VReg_352_Align2RegClassID:
2313 case AMDGPU::AReg_352_Align2RegClassID:
2314 case AMDGPU::AV_352RegClassID:
2315 case AMDGPU::AV_352_Align2RegClassID:
2317 case AMDGPU::SGPR_384RegClassID:
2318 case AMDGPU::SReg_384RegClassID:
2319 case AMDGPU::VReg_384RegClassID:
2320 case AMDGPU::AReg_384RegClassID:
2321 case AMDGPU::VReg_384_Align2RegClassID:
2322 case AMDGPU::AReg_384_Align2RegClassID:
2323 case AMDGPU::AV_384RegClassID:
2324 case AMDGPU::AV_384_Align2RegClassID:
2326 case AMDGPU::SGPR_512RegClassID:
2327 case AMDGPU::SReg_512RegClassID:
2328 case AMDGPU::VReg_512RegClassID:
2329 case AMDGPU::AReg_512RegClassID:
2330 case AMDGPU::VReg_512_Align2RegClassID:
2331 case AMDGPU::AReg_512_Align2RegClassID:
2332 case AMDGPU::AV_512RegClassID:
2333 case AMDGPU::AV_512_Align2RegClassID:
2335 case AMDGPU::SGPR_1024RegClassID:
2336 case AMDGPU::SReg_1024RegClassID:
2337 case AMDGPU::VReg_1024RegClassID:
2338 case AMDGPU::AReg_1024RegClassID:
2339 case AMDGPU::VReg_1024_Align2RegClassID:
2340 case AMDGPU::AReg_1024_Align2RegClassID:
2341 case AMDGPU::AV_1024RegClassID:
2342 case AMDGPU::AV_1024_Align2RegClassID:
2356 unsigned RCID = Desc.
operands()[OpNo].RegClass;
2374 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2400 (Val == 0x3e22f983 && HasInv2Pi);
2411 return Val == 0x3C00 ||
2425 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2426 int16_t Trunc =
static_cast<int16_t
>(Literal);
2429 if (!(Literal & 0xffff))
2432 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2433 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2438 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2439 if (isInt<16>(Literal) || isUInt<16>(Literal))
2442 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2443 if (!(Literal & 0xffff))
2451 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2452 if (isInt<16>(Literal) || isUInt<16>(Literal))
2455 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2456 if (!(Literal & 0xffff))
2458 return Lo16 == Hi16;
2480 return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
2481 F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
2497 int64_t EncodedOffset) {
2499 : isUInt<8>(EncodedOffset);
2503 int64_t EncodedOffset,
2507 isInt<21>(EncodedOffset);
2511 return (ByteOffset & 3) == 0;
2520 return ByteOffset >> 2;
2524 int64_t ByteOffset,
bool IsBuffer) {
2528 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2533 return std::nullopt;
2537 ? std::optional<int64_t>(EncodedOffset)
2542 int64_t ByteOffset) {
2544 return std::nullopt;
2547 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2572 if (
Imm <= MaxImm + 64) {
2574 Overflow =
Imm - MaxImm;
2608 StringRef IEEEAttr =
F.getFnAttribute(
"amdgpu-ieee").getValueAsString();
2609 if (!IEEEAttr.
empty())
2610 IEEE = IEEEAttr ==
"true";
2613 =
F.getFnAttribute(
"amdgpu-dx10-clamp").getValueAsString();
2614 if (!DX10ClampAttr.
empty())
2617 StringRef DenormF32Attr =
F.getFnAttribute(
"denormal-fp-math-f32").getValueAsString();
2618 if (!DenormF32Attr.
empty())
2621 StringRef DenormAttr =
F.getFnAttribute(
"denormal-fp-math").getValueAsString();
2622 if (!DenormAttr.
empty()) {
2624 if (DenormF32Attr.
empty())
2632 struct SourceOfDivergence {
2635 const SourceOfDivergence *lookupSourceOfDivergence(
unsigned Intr);
2637 #define GET_SourcesOfDivergence_IMPL
2638 #define GET_Gfx9BufferFormat_IMPL
2639 #define GET_Gfx10BufferFormat_IMPL
2640 #define GET_Gfx11PlusBufferFormat_IMPL
2641 #include "AMDGPUGenSearchableTables.inc"
2646 return lookupSourceOfDivergence(IntrID);
2650 uint8_t NumComponents,
2654 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2656 :
isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2658 : getGfx9BufferFormatInfo(BitsPerComp,
2664 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2665 :
isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2666 : getGfx9BufferFormatInfo(Format);
2675 OS <<
"Unsupported";
@ OPERAND_REG_INLINE_C_FP64
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
@ ELFABIVERSION_AMDGPU_HSA_V4
const int OPR_ID_UNSUPPORTED
bool getMUBUFIsBufferInv(unsigned Opc)
constexpr unsigned COMPONENTS_NUM
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
This class represents an incoming formal argument to a Function.
AMDGPUTargetID(const MCSubtargetInfo &STI)
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
@ FLOAT_DENORM_MODE_FLUSH_NONE
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
This is an optimization pass for GlobalISel generic memory operations.
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool getMUBUFHasSoffset(unsigned Opc)
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
bool hasVOPD(const MCSubtargetInfo &STI)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
@ LOCAL_ADDRESS
Address space for local memory.
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool isGFX11Plus(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
const char *const OpGsSymbolic[OP_GS_LAST_]
int getMUBUFBaseOpcode(unsigned Opc)
bool isSramEccOnOrAny() const
int getMCOpcode(uint16_t Opcode, unsigned Gen)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
A raw_ostream that writes to an std::string.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
@ ELFABIVERSION_AMDGPU_HSA_V2
bool hasXNACK(const MCSubtargetInfo &STI)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isVOPD(unsigned Opc)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
@ AMDGPU_Gfx
Used for AMD graphics targets.
bool getVOP2IsSingle(unsigned Opc)
const int OPR_VAL_INVALID
Reg
All possible values of the reg field in the ModR/M byte.
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
unsigned getCompletionActionImplicitArgPosition()
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Triple - Helper class for working with autoconf configuration names.
int getVOPDFull(unsigned OpX, unsigned OpY)
ArrayRef< MCOperandInfo > operands() const
const char *const OpSysSymbolic[OP_SYS_LAST_]
unsigned getAmdhsaCodeObjectVersion()
constexpr unsigned BANKS_NUM[]
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
bool isGFX10(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC)
bool hasA16(const MCSubtargetInfo &STI)
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
bool isInlineValue(unsigned Reg)
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isGlobalSegment(const GlobalValue *GV)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
uint32_t compute_pgm_rsrc2
unsigned const TargetRegisterInfo * TRI
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isTrue16Inst(unsigned Opc)
constexpr uint32_t VersionMajor
HSA metadata major version.
MCRegisterClass - Base class of TargetRegisterClass.
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
Instruction set architecture version.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
const int OPR_ID_DUPLICATE
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
bool getMTBUFHasSrsrc(unsigned Opc)
bool isShader(CallingConv::ID cc)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
bool isInlinableIntLiteralV216(int32_t Literal)
@ OPERAND_REG_INLINE_AC_FP16
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
const Triple & getTargetTriple() const
bool getSMEMIsBuffer(unsigned Opc)
unsigned getIndexOfDstInParsedOperands() const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool isGFX940(const MCSubtargetInfo &STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMAIInsts(const MCSubtargetInfo &STI)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Manages the enabling and disabling of subtarget specific features.
bool isXnackOnOrAny() const
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool getMAIIsGFX940XDL(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Describe properties that are true of each instruction in the target description file.
#define S_00B848_MEM_ORDERED(x)
const FeatureBitset & getFeatureBits() const
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getMTBUFBaseOpcode(unsigned Opc)
unsigned getInitialPSInputAddr(const Function &F)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool getMTBUFHasSoffset(unsigned Opc)
@ COMPLETION_ACTION_OFFSET
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
TargetIDSetting getSramEccSetting() const
uint16_t kernel_code_properties
bool getMUBUFHasSrsrc(unsigned Opc)
Analysis containing CSE Info
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
ArchType getArch() const
Get the parsed architecture type of this triple.
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
@ ELFABIVERSION_AMDGPU_HSA_V5
bool isCI(const MCSubtargetInfo &STI)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isGFX10Plus(const MCSubtargetInfo &STI)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
uint32_t compute_pgm_rsrc3
bool isEntryFunctionCC(CallingConv::ID CC)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isValidHwregWidth(int64_t Width)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool shouldEmitConstantsToTextSection(const Triple &TT)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
constexpr bool empty() const
empty - Check if the string is empty.
bool isValidHwreg(int64_t Id)
bool isVI(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned short NumOperands
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
unsigned getMultigridSyncArgImplicitArgPosition()
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool hasG16(const MCSubtargetInfo &STI)
OSType getOS() const
Get the parsed operating system type of this triple.
bool isVOPC64DPP(unsigned Opc)
@ ELFABIVERSION_AMDGPU_HSA_V3
@ OPERAND_REG_INLINE_C_FP32
bool isGFX9(const MCSubtargetInfo &STI)
This is an important class for using LLVM in a threaded context.
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
initializer< Ty > init(const Ty &Val)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
bool isValidHwregOffset(int64_t Offset)
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned getAddressSpace() const
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
TargetIDSetting getXnackSetting() const
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition()
print Print MemDeps of function
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_V2INT16
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
static constexpr ExpTgt ExpTgtInfo[]
@ OPERAND_REG_INLINE_AC_FP32
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
static bool isDwordAligned(uint64_t ByteOffset)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
SmallVector< MachineOperand, 4 > Cond
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
StringRef - Represent a constant reference to a string, i.e.
bool getVOP3IsSingle(unsigned Opc)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isCompute(CallingConv::ID cc)
unsigned getID() const
getID() - Return the register class ID number.
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool isGraphics(CallingConv::ID cc)
AMD Kernel Code Object (amd_kernel_code_t).
bool isXnackSupported() const
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
bool getMTBUFHasVAddr(unsigned Opc)
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
#define AMDHSA_BITS_SET(DST, MSK, VAL)
unsigned getCompParsedSrcOperandsNum() const
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
unsigned const MachineRegisterInfo * MRI
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isGroupSegment(const GlobalValue *GV)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
constexpr size_t size() const
size - Get the string size.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
bool isSI(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_AC_FP64
std::string toString() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Interface to description of machine instruction set.
bool isArgPassedInSGPR(const Argument *A)
@ MULTIGRID_SYNC_ARG_OFFSET
Represents the counter values to wait for in an s_waitcnt instruction.
@ ET_DUAL_SRC_BLEND_MAX_IDX
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
const CustomOperand< const MCSubtargetInfo & > Opr[]
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ OPERAND_REG_IMM_V2INT16
int getMUBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
@ SPIR_KERNEL
Used for SPIR kernel functions.
bool isPermlane16(unsigned Opc)
bool endswith(StringRef Suffix) const
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_FIRST
bool getVOP1IsSingle(unsigned Opc)
unsigned getVOPDOpcode(unsigned Opc)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, Align Alignment)
unsigned getTgtId(const StringRef Name)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
@ OPERAND_REG_INLINE_C_V2FP32
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
uint32_t compute_pgm_rsrc1
unsigned getLgkmcntBitMask(const IsaVersion &Version)
Generation getGeneration() const
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX8Plus(const MCSubtargetInfo &STI)
constexpr bool test(unsigned I) const
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
CanBeVOPD getCanBeVOPD(unsigned Opc)
@ OPERAND_REG_INLINE_C_FP16
std::string str() const
str - Get the contents as an std::string.
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
void setTargetIDFromFeaturesString(StringRef FS)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
#define S_00B848_WGP_MODE(x)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
@ OPERAND_REG_INLINE_AC_V2INT16
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
bool isSramEccSupported() const
bool isKernelCC(const Function *Func)
unsigned getHostcallImplicitArgPosition()
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Generic base class for all target subtargets.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool getHasColorExport(const Function &F)
bool getHasDepthExport(const Function &F)
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
int getMTBUFElements(unsigned Opc)
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_AC_V2FP16
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)