39#define DEBUG_TYPE "amdgpu-disassembler"
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
51 TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
71 std::advance(
I, OpIdx);
84 APInt SignedOffset(18, Imm * 4,
true);
85 int64_t
Offset = (SignedOffset.
sext(64) + 4 +
Addr).getSExtValue();
87 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
99 Offset = SignExtend64<21>(Imm);
107 return addOperand(Inst, DAsm->decodeBoolReg(Val));
110#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
111 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
113 const MCDisassembler *Decoder) { \
114 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
115 return addOperand(Inst, DAsm->DecoderName(Imm)); \
120#define DECODE_OPERAND_REG_8(RegClass) \
121 static DecodeStatus Decode##RegClass##RegisterClass( \
122 MCInst &Inst, unsigned Imm, uint64_t , \
123 const MCDisassembler *Decoder) { \
124 assert(Imm < (1 << 8) && "8-bit encoding"); \
125 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
127 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
130#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
132 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
133 const MCDisassembler *Decoder) { \
134 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
135 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
136 return addOperand(Inst, \
137 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
138 MandatoryLiteral, ImmWidth)); \
143#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
144 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
150#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
151 DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
152 Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
155#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
156 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
161#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
162 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
166#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
167 DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
174#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
175 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
180#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
181 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
182 Imm | 512, false, ImmWidth)
184#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
185 DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
186 OpWidth, Imm, true, ImmWidth)
269 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
276 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
282 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
294 unsigned Sub =
MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
295 auto Reg = Sub ? Sub : Op.getReg();
296 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
303 if (!DAsm->isGFX90A()) {
314 : AMDGPU::OpName::vdata;
330 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
368#define DECODE_SDWA(DecName) \
369DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
375#include "AMDGPUGenDisassemblerTables.inc"
383 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.
data());
384 Bytes = Bytes.
slice(
sizeof(
T));
390 uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
392 Bytes = Bytes.
slice(8);
393 uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
395 Bytes = Bytes.
slice(4);
406 if ((
unsigned)FiIdx >=
MI.getNumOperands())
408 unsigned Fi =
MI.getOperand(FiIdx).getImm();
409 return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
418 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
419 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
451 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
453 if (Bytes.
size() >= 8) {
454 const uint64_t QW = eatBytes<uint64_t>(Bytes);
489 if (Res) { IsSDWA =
true;
break; }
492 if (Res) { IsSDWA =
true;
break; }
495 if (Res) { IsSDWA =
true;
break; }
514 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
517 if (Bytes.
size() < 4)
break;
518 const uint32_t DW = eatBytes<uint32_t>(Bytes);
545 if (Bytes.
size() < 4)
break;
582 AMDGPU::OpName::src2_modifiers);
585 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
588 AMDGPU::OpName::cpol);
593 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
595 AMDGPU::OpName::cpol);
597 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
602 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
608 if (TFEOpIdx != -1) {
609 auto TFEIter =
MI.begin();
610 std::advance(TFEIter, TFEOpIdx);
615 if (Res && (MCII->get(
MI.getOpcode()).TSFlags &
619 if (SWZOpIdx != -1) {
620 auto SWZIter =
MI.begin();
621 std::advance(SWZIter, SWZOpIdx);
631 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
632 if (VAddr0Idx >= 0 && NSAArgs > 0) {
633 unsigned NSAWords = (NSAArgs + 3) / 4;
634 if (Bytes.
size() < 4 * NSAWords) {
637 for (
unsigned i = 0; i < NSAArgs; ++i) {
638 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
640 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
641 MI.insert(
MI.begin() + VAddrIdx,
644 Bytes = Bytes.
slice(4 * NSAWords);
662 AMDGPU::OpName::vdst_in);
663 if (VDstIn_Idx != -1) {
664 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
666 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
667 !
MI.getOperand(VDstIn_Idx).isReg() ||
668 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
669 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
670 MI.erase(&
MI.getOperand(VDstIn_Idx));
673 AMDGPU::OpName::vdst_in);
680 if (Res && ImmLitIdx != -1 && !IsSOPK)
685 Size = Res ? (MaxInstBytesNum - Bytes.
size())
686 : std::min((
size_t)4, Bytes_.
size());
701 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
702 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
703 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
704 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
723 AMDGPU::OpName::sdst);
734 unsigned OpSelHi = 0;
743 bool IsVOP3P =
false) {
745 unsigned Opc =
MI.getOpcode();
746 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
747 AMDGPU::OpName::src1_modifiers,
748 AMDGPU::OpName::src2_modifiers};
749 for (
int J = 0; J < 3; ++J) {
754 unsigned Val =
MI.getOperand(OpIdx).getImm();
772 constexpr int DST_IDX = 0;
773 auto Opcode =
MI.getOpcode();
774 const auto &Desc = MCII->get(Opcode);
777 if (OldIdx != -1 && Desc.getOperandConstraint(
780 assert(Desc.getOperandConstraint(
792 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
795 AMDGPU::OpName::src2_modifiers);
801 unsigned Opc =
MI.getOpcode();
811 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
812 if (
MI.getNumOperands() < DescNumOps &&
816 AMDGPU::OpName::op_sel);
819 if (
MI.getNumOperands() < DescNumOps &&
822 AMDGPU::OpName::src0_modifiers);
824 if (
MI.getNumOperands() < DescNumOps &&
827 AMDGPU::OpName::src1_modifiers);
837 unsigned Opc =
MI.getOpcode();
838 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
839 if (
MI.getNumOperands() < DescNumOps &&
843 AMDGPU::OpName::op_sel);
854 AMDGPU::OpName::vdst);
857 AMDGPU::OpName::vdata);
863 AMDGPU::OpName::dmask);
866 AMDGPU::OpName::tfe);
868 AMDGPU::OpName::d16);
875 if (BaseOpcode->
BVH) {
882 bool IsAtomic = (VDstIdx != -1);
885 bool IsPartialNSA =
false;
886 unsigned AddrSize =
Info->VAddrDwords;
895 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
900 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
901 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
906 if (AddrSize >
Info->VAddrDwords) {
917 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
918 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
920 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
922 DstSize = (DstSize + 1) / 2;
925 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
928 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
937 unsigned NewVdata = AMDGPU::NoRegister;
938 if (DstSize !=
Info->VDataDwords) {
939 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
942 unsigned Vdata0 =
MI.getOperand(VDataIdx).getReg();
943 unsigned VdataSub0 = MRI.
getSubReg(Vdata0, AMDGPU::sub0);
944 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
948 if (NewVdata == AMDGPU::NoRegister) {
957 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
958 unsigned NewVAddrSA = AMDGPU::NoRegister;
959 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
960 AddrSize !=
Info->VAddrDwords) {
961 unsigned VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
962 unsigned VAddrSubSA = MRI.
getSubReg(VAddrSA, AMDGPU::sub0);
963 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
965 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
972 MI.setOpcode(NewOpcode);
974 if (NewVdata != AMDGPU::NoRegister) {
986 assert(AddrSize <= Info->VAddrDwords);
987 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
988 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
998 unsigned Opc =
MI.getOpcode();
999 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1002 if (
MI.getNumOperands() < DescNumOps &&
1006 if (
MI.getNumOperands() < DescNumOps &&
1009 AMDGPU::OpName::op_sel);
1010 if (
MI.getNumOperands() < DescNumOps &&
1013 AMDGPU::OpName::op_sel_hi);
1014 if (
MI.getNumOperands() < DescNumOps &&
1017 AMDGPU::OpName::neg_lo);
1018 if (
MI.getNumOperands() < DescNumOps &&
1021 AMDGPU::OpName::neg_hi);
1028 unsigned Opc =
MI.getOpcode();
1029 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1031 if (
MI.getNumOperands() < DescNumOps &&
1035 if (
MI.getNumOperands() < DescNumOps &&
1038 AMDGPU::OpName::src0_modifiers);
1040 if (
MI.getNumOperands() < DescNumOps &&
1043 AMDGPU::OpName::src1_modifiers);
1048 int ImmLitIdx)
const {
1049 assert(HasLiteral &&
"Should have decoded a literal");
1053 AMDGPU::OpName::immDeferred);
1054 assert(DescNumOps ==
MI.getNumOperands());
1055 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1056 auto &Op =
MI.getOperand(
I);
1057 auto OpType = Desc.
operands()[
I].OperandType;
1074 const Twine& ErrMsg)
const {
1089 unsigned Val)
const {
1090 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1091 if (Val >= RegCl.getNumRegs())
1093 ": unknown register " +
Twine(Val));
1099 unsigned Val)
const {
1103 switch (SRegClassID) {
1104 case AMDGPU::SGPR_32RegClassID:
1105 case AMDGPU::TTMP_32RegClassID:
1107 case AMDGPU::SGPR_64RegClassID:
1108 case AMDGPU::TTMP_64RegClassID:
1111 case AMDGPU::SGPR_128RegClassID:
1112 case AMDGPU::TTMP_128RegClassID:
1115 case AMDGPU::SGPR_256RegClassID:
1116 case AMDGPU::TTMP_256RegClassID:
1119 case AMDGPU::SGPR_288RegClassID:
1120 case AMDGPU::TTMP_288RegClassID:
1121 case AMDGPU::SGPR_320RegClassID:
1122 case AMDGPU::TTMP_320RegClassID:
1123 case AMDGPU::SGPR_352RegClassID:
1124 case AMDGPU::TTMP_352RegClassID:
1125 case AMDGPU::SGPR_384RegClassID:
1126 case AMDGPU::TTMP_384RegClassID:
1127 case AMDGPU::SGPR_512RegClassID:
1128 case AMDGPU::TTMP_512RegClassID:
1137 if (Val % (1 << shift)) {
1139 <<
": scalar reg isn't aligned " << Val;
1151 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1153 return errOperand(Val,
"More than one unique literal is illegal");
1165 if (Bytes.
size() < 4) {
1166 return errOperand(0,
"cannot read literal, inst bytes left " +
1170 Literal = eatBytes<uint32_t>(Bytes);
1176 using namespace AMDGPU::EncValues;
1178 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1180 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1181 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1188 return llvm::bit_cast<uint32_t>(0.5f);
1190 return llvm::bit_cast<uint32_t>(-0.5f);
1192 return llvm::bit_cast<uint32_t>(1.0f);
1194 return llvm::bit_cast<uint32_t>(-1.0f);
1196 return llvm::bit_cast<uint32_t>(2.0f);
1198 return llvm::bit_cast<uint32_t>(-2.0f);
1200 return llvm::bit_cast<uint32_t>(4.0f);
1202 return llvm::bit_cast<uint32_t>(-4.0f);
1213 return llvm::bit_cast<uint64_t>(0.5);
1215 return llvm::bit_cast<uint64_t>(-0.5);
1217 return llvm::bit_cast<uint64_t>(1.0);
1219 return llvm::bit_cast<uint64_t>(-1.0);
1221 return llvm::bit_cast<uint64_t>(2.0);
1223 return llvm::bit_cast<uint64_t>(-2.0);
1225 return llvm::bit_cast<uint64_t>(4.0);
1227 return llvm::bit_cast<uint64_t>(-4.0);
1229 return 0x3fc45f306dc9c882;
1282 using namespace AMDGPU;
1290 return VGPR_32RegClassID;
1292 case OPWV232:
return VReg_64RegClassID;
1293 case OPW96:
return VReg_96RegClassID;
1294 case OPW128:
return VReg_128RegClassID;
1295 case OPW160:
return VReg_160RegClassID;
1296 case OPW256:
return VReg_256RegClassID;
1297 case OPW288:
return VReg_288RegClassID;
1298 case OPW320:
return VReg_320RegClassID;
1299 case OPW352:
return VReg_352RegClassID;
1300 case OPW384:
return VReg_384RegClassID;
1301 case OPW512:
return VReg_512RegClassID;
1302 case OPW1024:
return VReg_1024RegClassID;
1307 using namespace AMDGPU;
1315 return AGPR_32RegClassID;
1317 case OPWV232:
return AReg_64RegClassID;
1318 case OPW96:
return AReg_96RegClassID;
1319 case OPW128:
return AReg_128RegClassID;
1320 case OPW160:
return AReg_160RegClassID;
1321 case OPW256:
return AReg_256RegClassID;
1322 case OPW288:
return AReg_288RegClassID;
1323 case OPW320:
return AReg_320RegClassID;
1324 case OPW352:
return AReg_352RegClassID;
1325 case OPW384:
return AReg_384RegClassID;
1326 case OPW512:
return AReg_512RegClassID;
1327 case OPW1024:
return AReg_1024RegClassID;
1333 using namespace AMDGPU;
1341 return SGPR_32RegClassID;
1343 case OPWV232:
return SGPR_64RegClassID;
1344 case OPW96:
return SGPR_96RegClassID;
1345 case OPW128:
return SGPR_128RegClassID;
1346 case OPW160:
return SGPR_160RegClassID;
1347 case OPW256:
return SGPR_256RegClassID;
1348 case OPW288:
return SGPR_288RegClassID;
1349 case OPW320:
return SGPR_320RegClassID;
1350 case OPW352:
return SGPR_352RegClassID;
1351 case OPW384:
return SGPR_384RegClassID;
1352 case OPW512:
return SGPR_512RegClassID;
1357 using namespace AMDGPU;
1365 return TTMP_32RegClassID;
1367 case OPWV232:
return TTMP_64RegClassID;
1368 case OPW128:
return TTMP_128RegClassID;
1369 case OPW256:
return TTMP_256RegClassID;
1370 case OPW288:
return TTMP_288RegClassID;
1371 case OPW320:
return TTMP_320RegClassID;
1372 case OPW352:
return TTMP_352RegClassID;
1373 case OPW384:
return TTMP_384RegClassID;
1374 case OPW512:
return TTMP_512RegClassID;
1379 using namespace AMDGPU::EncValues;
1381 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1382 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1384 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1388 bool MandatoryLiteral,
1389 unsigned ImmWidth)
const {
1390 using namespace AMDGPU::EncValues;
1394 bool IsAGPR = Val & 512;
1397 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1403 static_assert(SGPR_MIN == 0);
1412 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1415 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1418 if (Val == LITERAL_CONST) {
1419 if (MandatoryLiteral)
1442 unsigned Val)
const {
1448 Val |= ~XDstReg & 1;
1454 using namespace AMDGPU;
1490 using namespace AMDGPU;
1522 unsigned ImmWidth)
const {
1523 using namespace AMDGPU::SDWA;
1524 using namespace AMDGPU::EncValues;
1530 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1531 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1533 Val - SDWA9EncValues::SRC_VGPR_MIN);
1535 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1536 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1537 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1539 Val - SDWA9EncValues::SRC_SGPR_MIN);
1541 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1542 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1544 Val - SDWA9EncValues::SRC_TTMP_MIN);
1547 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1549 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1552 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1556 }
else if (
STI.
hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1571 using namespace AMDGPU::SDWA;
1575 "SDWAVopcDst should be present only on GFX9+");
1577 bool IsWave64 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize64);
1579 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1580 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1631 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1637#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1639 KdStream << Indent << DIRECTIVE " " \
1640 << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1646 using namespace amdhsa;
1654 uint32_t GranulatedWorkitemVGPRCount =
1655 (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1656 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1658 uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1661 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
1682 uint32_t GranulatedWavefrontSGPRCount =
1683 (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1684 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1686 if (
isGFX10Plus() && GranulatedWavefrontSGPRCount)
1689 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1692 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
1694 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
1695 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
1696 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
1698 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1702 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1704 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1706 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1708 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1710 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1713 PRINT_DIRECTIVE(
".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1715 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1718 PRINT_DIRECTIVE(
".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1720 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1723 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1726 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1728 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1733 COMPUTE_PGM_RSRC1_WGP_MODE);
1734 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1735 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1743 using namespace amdhsa;
1747 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1749 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
1750 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1752 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1754 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1756 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1758 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1760 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1762 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1765 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1768 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1772 ".amdhsa_exception_fp_ieee_invalid_op",
1773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1777 ".amdhsa_exception_fp_ieee_div_zero",
1778 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1780 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1784 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1788 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1794#undef PRINT_DIRECTIVE
1800#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1802 KdStream << Indent << DIRECTIVE " " \
1803 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1815 switch (Cursor.
tell()) {
1817 FourByteBuffer = DE.
getU32(Cursor);
1818 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
1823 FourByteBuffer = DE.
getU32(Cursor);
1824 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
1825 << FourByteBuffer <<
'\n';
1829 FourByteBuffer = DE.
getU32(Cursor);
1830 KdStream << Indent <<
".amdhsa_kernarg_size "
1831 << FourByteBuffer <<
'\n';
1836 ReservedBytes = DE.
getBytes(Cursor, 4);
1837 for (
int I = 0;
I < 4; ++
I) {
1838 if (ReservedBytes[
I] != 0) {
1853 ReservedBytes = DE.
getBytes(Cursor, 20);
1854 for (
int I = 0;
I < 20; ++
I) {
1855 if (ReservedBytes[
I] != 0) {
1865 FourByteBuffer = DE.
getU32(Cursor);
1872 FourByteBuffer = DE.
getU32(Cursor);
1880 FourByteBuffer = DE.
getU32(Cursor);
1888 using namespace amdhsa;
1889 TwoByteBuffer = DE.
getU16(Cursor);
1893 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
1895 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
1897 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
1899 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
1901 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
1904 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
1906 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
1908 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
1913 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
1917 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
1922 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
1924 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
1931 ReservedBytes = DE.
getBytes(Cursor, 6);
1932 for (
int I = 0;
I < 6; ++
I) {
1933 if (ReservedBytes[
I] != 0)
1942#undef PRINT_DIRECTIVE
1948 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
1953 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
1956 while (
C &&
C.tell() < Bytes.
size()) {
1965 KdStream <<
".end_amdhsa_kernel\n";
1970std::optional<MCDisassembler::DecodeStatus>
1992 return std::nullopt;
2017 if (Result != Symbols->end()) {
2024 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2043 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static bool isValidDPP8(const MCInst &MI)
static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecodeStatus DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define DECODE_SDWA(DecName)
#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)
#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)
#define DECODE_OPERAND_REG_8(RegClass)
#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)
static int64_t getInlineImmVal16(unsigned Imm)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)
static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)
static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm)
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
MCOperand decodeLiteralConstant() const
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
DecodeStatus decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
DecodeStatus convertSDWAInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
DecodeStatus convertVOP3DPPInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
DecodeStatus convertMIMGInst(MCInst &MI) const
DecodeStatus convertVINTERPInst(MCInst &MI) const
DecodeStatus convertDPP8Inst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus convertEXPInst(MCInst &MI) const
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const override
Used to perform separate target specific disassembly for a particular symbol.
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth=0) const
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const
DecodeStatus convertVOPCDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSDWASrc16(unsigned Val) const
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Symbolize and annotate disassembled instructions.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
unsigned getAmdhsaCodeObjectVersion()
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isGFX9Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.