39#define DEBUG_TYPE "amdgpu-disassembler"
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
74 std::advance(
I, OpIdx);
87 APInt SignedOffset(18, Imm * 4,
true);
88 int64_t
Offset = (SignedOffset.
sext(64) + 4 +
Addr).getSExtValue();
90 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
99 if (DAsm->isGFX12Plus()) {
100 Offset = SignExtend64<24>(Imm);
101 }
else if (DAsm->isVI()) {
104 Offset = SignExtend64<21>(Imm);
112 return addOperand(Inst, DAsm->decodeBoolReg(Val));
119 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
125 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
128#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
129 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
131 const MCDisassembler *Decoder) { \
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
133 return addOperand(Inst, DAsm->DecoderName(Imm)); \
138#define DECODE_OPERAND_REG_8(RegClass) \
139 static DecodeStatus Decode##RegClass##RegisterClass( \
140 MCInst &Inst, unsigned Imm, uint64_t , \
141 const MCDisassembler *Decoder) { \
142 assert(Imm < (1 << 8) && "8-bit encoding"); \
143 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
145 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
148#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
150 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
151 const MCDisassembler *Decoder) { \
152 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
153 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
154 return addOperand(Inst, \
155 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
156 MandatoryLiteral, ImmWidth)); \
161 unsigned Imm,
unsigned EncImm,
162 bool MandatoryLiteral,
unsigned ImmWidth,
165 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
173#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
174 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
180template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
188template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
192 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
199template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
208template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
212 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
237 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
246 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
287 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
293 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
299 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
304 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
310 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
313 bool IsVGPR = Imm & (1 << 8);
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 Imm & 0xFF,
false, 16));
326 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
329 bool IsVGPR = Imm & (1 << 8);
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
336 Imm & 0xFF,
false, 16));
343 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
349 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
361 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
362 auto Reg = Sub ? Sub :
Op.getReg();
363 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
370 if (!DAsm->isGFX90A()) {
379 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
381 : AMDGPU::OpName::vdata;
397 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
400template <AMDGPUDisassembler::OpW
idthTy Opw>
410 assert(Imm < (1 << 9) &&
"9-bit encoding");
417#define DECODE_SDWA(DecName) \
418DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
424#include "AMDGPUGenDisassemblerTables.inc"
433 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
434 Bytes = Bytes.
slice(
sizeof(
T));
441 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
442 Bytes = Bytes.
slice(8);
444 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
445 Bytes = Bytes.
slice(4);
453 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
454 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
458 Size = std::min((
size_t)4, Bytes_.
size());
485 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
487 if (Bytes.
size() >= 8) {
488 const uint64_t QW = eatBytes<uint64_t>(Bytes);
543 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
546 if (Bytes.
size() >= 4) {
547 const uint32_t DW = eatBytes<uint32_t>(Bytes);
603 AMDGPU::OpName::src2_modifiers);
606 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
607 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
610 AMDGPU::OpName::src2_modifiers);
618 if (MCII->get(
MI.getOpcode()).TSFlags &
621 AMDGPU::OpName::cpol);
626 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
628 AMDGPU::OpName::cpol);
630 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
635 if ((MCII->get(
MI.getOpcode()).TSFlags &
641 if (TFEOpIdx != -1) {
642 auto TFEIter =
MI.begin();
643 std::advance(TFEIter, TFEOpIdx);
648 if (MCII->get(
MI.getOpcode()).TSFlags &
652 if (SWZOpIdx != -1) {
653 auto SWZIter =
MI.begin();
654 std::advance(SWZIter, SWZOpIdx);
664 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
665 if (VAddr0Idx >= 0 && NSAArgs > 0) {
666 unsigned NSAWords = (NSAArgs + 3) / 4;
667 if (Bytes.
size() < 4 * NSAWords)
669 for (
unsigned i = 0; i < NSAArgs; ++i) {
670 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
672 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
675 Bytes = Bytes.
slice(4 * NSAWords);
681 if (MCII->get(
MI.getOpcode()).TSFlags &
695 AMDGPU::OpName::vdst_in);
696 if (VDstIn_Idx != -1) {
697 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
699 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
700 !
MI.getOperand(VDstIn_Idx).isReg() ||
701 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
702 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
703 MI.erase(&
MI.getOperand(VDstIn_Idx));
706 AMDGPU::OpName::vdst_in);
713 if (ImmLitIdx != -1 && !IsSOPK)
716 Size = MaxInstBytesNum - Bytes.
size();
730 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
731 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
732 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
733 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
734 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
735 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
736 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
737 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
755 AMDGPU::OpName::sdst);
765 unsigned OpSelHi = 0;
774 bool IsVOP3P =
false) {
776 unsigned Opc =
MI.getOpcode();
777 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
778 AMDGPU::OpName::src1_modifiers,
779 AMDGPU::OpName::src2_modifiers};
780 for (
int J = 0; J < 3; ++J) {
785 unsigned Val =
MI.getOperand(OpIdx).getImm();
803 const unsigned Opc =
MI.getOpcode();
806 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
807 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
809 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
811 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
813 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
815 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
818 if (OpIdx == -1 || OpModsIdx == -1)
826 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
827 unsigned ModVal = OpMods.
getImm();
828 if (ModVal & OpSelMask) {
838 constexpr int DST_IDX = 0;
839 auto Opcode =
MI.getOpcode();
840 const auto &
Desc = MCII->get(Opcode);
843 if (OldIdx != -1 &&
Desc.getOperandConstraint(
858 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
861 AMDGPU::OpName::src2_modifiers);
865 unsigned Opc =
MI.getOpcode();
872 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
873 if (
MI.getNumOperands() < DescNumOps &&
878 AMDGPU::OpName::op_sel);
881 if (
MI.getNumOperands() < DescNumOps &&
884 AMDGPU::OpName::src0_modifiers);
886 if (
MI.getNumOperands() < DescNumOps &&
889 AMDGPU::OpName::src1_modifiers);
901 unsigned Opc =
MI.getOpcode();
902 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
903 if (
MI.getNumOperands() < DescNumOps &&
907 AMDGPU::OpName::op_sel);
915 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
918 AMDGPU::OpName::vdst);
921 AMDGPU::OpName::vdata);
925 : AMDGPU::OpName::rsrc;
928 AMDGPU::OpName::dmask);
931 AMDGPU::OpName::tfe);
933 AMDGPU::OpName::d16);
940 if (BaseOpcode->
BVH) {
946 bool IsAtomic = (VDstIdx != -1);
950 bool IsPartialNSA =
false;
951 unsigned AddrSize =
Info->VAddrDwords;
960 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
967 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
968 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
969 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
971 if (!IsVSample && AddrSize > 12)
974 if (AddrSize >
Info->VAddrDwords) {
985 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
986 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
988 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
990 DstSize = (DstSize + 1) / 2;
993 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
996 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1001 if (NewOpcode == -1)
1005 unsigned NewVdata = AMDGPU::NoRegister;
1006 if (DstSize !=
Info->VDataDwords) {
1007 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1010 unsigned Vdata0 =
MI.getOperand(VDataIdx).getReg();
1011 unsigned VdataSub0 = MRI.
getSubReg(Vdata0, AMDGPU::sub0);
1012 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1016 if (NewVdata == AMDGPU::NoRegister) {
1025 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1026 unsigned NewVAddrSA = AMDGPU::NoRegister;
1027 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1028 AddrSize !=
Info->VAddrDwords) {
1029 unsigned VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1030 unsigned VAddrSubSA = MRI.
getSubReg(VAddrSA, AMDGPU::sub0);
1031 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1033 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1040 MI.setOpcode(NewOpcode);
1042 if (NewVdata != AMDGPU::NoRegister) {
1054 assert(AddrSize <= Info->VAddrDwords);
1055 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1056 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1064 unsigned Opc =
MI.getOpcode();
1065 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1068 if (
MI.getNumOperands() < DescNumOps &&
1072 if (
MI.getNumOperands() < DescNumOps &&
1075 AMDGPU::OpName::op_sel);
1076 if (
MI.getNumOperands() < DescNumOps &&
1079 AMDGPU::OpName::op_sel_hi);
1080 if (
MI.getNumOperands() < DescNumOps &&
1083 AMDGPU::OpName::neg_lo);
1084 if (
MI.getNumOperands() < DescNumOps &&
1087 AMDGPU::OpName::neg_hi);
1092 unsigned Opc =
MI.getOpcode();
1093 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1095 if (
MI.getNumOperands() < DescNumOps &&
1099 if (
MI.getNumOperands() < DescNumOps &&
1102 AMDGPU::OpName::src0_modifiers);
1104 if (
MI.getNumOperands() < DescNumOps &&
1107 AMDGPU::OpName::src1_modifiers);
1111 assert(HasLiteral &&
"Should have decoded a literal");
1113 unsigned DescNumOps =
Desc.getNumOperands();
1115 AMDGPU::OpName::immDeferred);
1116 assert(DescNumOps ==
MI.getNumOperands());
1117 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1118 auto &
Op =
MI.getOperand(
I);
1119 auto OpType =
Desc.operands()[
I].OperandType;
1135 const Twine& ErrMsg)
const {
1150 unsigned Val)
const {
1151 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1152 if (Val >= RegCl.getNumRegs())
1154 ": unknown register " +
Twine(Val));
1160 unsigned Val)
const {
1164 switch (SRegClassID) {
1165 case AMDGPU::SGPR_32RegClassID:
1166 case AMDGPU::TTMP_32RegClassID:
1168 case AMDGPU::SGPR_64RegClassID:
1169 case AMDGPU::TTMP_64RegClassID:
1172 case AMDGPU::SGPR_96RegClassID:
1173 case AMDGPU::TTMP_96RegClassID:
1174 case AMDGPU::SGPR_128RegClassID:
1175 case AMDGPU::TTMP_128RegClassID:
1178 case AMDGPU::SGPR_256RegClassID:
1179 case AMDGPU::TTMP_256RegClassID:
1182 case AMDGPU::SGPR_288RegClassID:
1183 case AMDGPU::TTMP_288RegClassID:
1184 case AMDGPU::SGPR_320RegClassID:
1185 case AMDGPU::TTMP_320RegClassID:
1186 case AMDGPU::SGPR_352RegClassID:
1187 case AMDGPU::TTMP_352RegClassID:
1188 case AMDGPU::SGPR_384RegClassID:
1189 case AMDGPU::TTMP_384RegClassID:
1190 case AMDGPU::SGPR_512RegClassID:
1191 case AMDGPU::TTMP_512RegClassID:
1200 if (Val % (1 << shift)) {
1202 <<
": scalar reg isn't aligned " << Val;
1210 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1220 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1222 return errOperand(Val,
"More than one unique literal is illegal");
1234 if (Bytes.
size() < 4) {
1235 return errOperand(0,
"cannot read literal, inst bytes left " +
1239 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1247 using namespace AMDGPU::EncValues;
1249 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1251 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1252 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1259 return llvm::bit_cast<uint32_t>(0.5f);
1261 return llvm::bit_cast<uint32_t>(-0.5f);
1263 return llvm::bit_cast<uint32_t>(1.0f);
1265 return llvm::bit_cast<uint32_t>(-1.0f);
1267 return llvm::bit_cast<uint32_t>(2.0f);
1269 return llvm::bit_cast<uint32_t>(-2.0f);
1271 return llvm::bit_cast<uint32_t>(4.0f);
1273 return llvm::bit_cast<uint32_t>(-4.0f);
1284 return llvm::bit_cast<uint64_t>(0.5);
1286 return llvm::bit_cast<uint64_t>(-0.5);
1288 return llvm::bit_cast<uint64_t>(1.0);
1290 return llvm::bit_cast<uint64_t>(-1.0);
1292 return llvm::bit_cast<uint64_t>(2.0);
1294 return llvm::bit_cast<uint64_t>(-2.0);
1296 return llvm::bit_cast<uint64_t>(4.0);
1298 return llvm::bit_cast<uint64_t>(-4.0);
1300 return 0x3fc45f306dc9c882;
1384 using namespace AMDGPU;
1392 return VGPR_32RegClassID;
1394 case OPWV232:
return VReg_64RegClassID;
1395 case OPW96:
return VReg_96RegClassID;
1396 case OPW128:
return VReg_128RegClassID;
1397 case OPW160:
return VReg_160RegClassID;
1398 case OPW256:
return VReg_256RegClassID;
1399 case OPW288:
return VReg_288RegClassID;
1400 case OPW320:
return VReg_320RegClassID;
1401 case OPW352:
return VReg_352RegClassID;
1402 case OPW384:
return VReg_384RegClassID;
1403 case OPW512:
return VReg_512RegClassID;
1404 case OPW1024:
return VReg_1024RegClassID;
1409 using namespace AMDGPU;
1417 return AGPR_32RegClassID;
1419 case OPWV232:
return AReg_64RegClassID;
1420 case OPW96:
return AReg_96RegClassID;
1421 case OPW128:
return AReg_128RegClassID;
1422 case OPW160:
return AReg_160RegClassID;
1423 case OPW256:
return AReg_256RegClassID;
1424 case OPW288:
return AReg_288RegClassID;
1425 case OPW320:
return AReg_320RegClassID;
1426 case OPW352:
return AReg_352RegClassID;
1427 case OPW384:
return AReg_384RegClassID;
1428 case OPW512:
return AReg_512RegClassID;
1429 case OPW1024:
return AReg_1024RegClassID;
1435 using namespace AMDGPU;
1443 return SGPR_32RegClassID;
1445 case OPWV232:
return SGPR_64RegClassID;
1446 case OPW96:
return SGPR_96RegClassID;
1447 case OPW128:
return SGPR_128RegClassID;
1448 case OPW160:
return SGPR_160RegClassID;
1449 case OPW256:
return SGPR_256RegClassID;
1450 case OPW288:
return SGPR_288RegClassID;
1451 case OPW320:
return SGPR_320RegClassID;
1452 case OPW352:
return SGPR_352RegClassID;
1453 case OPW384:
return SGPR_384RegClassID;
1454 case OPW512:
return SGPR_512RegClassID;
1459 using namespace AMDGPU;
1467 return TTMP_32RegClassID;
1469 case OPWV232:
return TTMP_64RegClassID;
1470 case OPW128:
return TTMP_128RegClassID;
1471 case OPW256:
return TTMP_256RegClassID;
1472 case OPW288:
return TTMP_288RegClassID;
1473 case OPW320:
return TTMP_320RegClassID;
1474 case OPW352:
return TTMP_352RegClassID;
1475 case OPW384:
return TTMP_384RegClassID;
1476 case OPW512:
return TTMP_512RegClassID;
1481 using namespace AMDGPU::EncValues;
1483 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1484 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1486 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1490 bool MandatoryLiteral,
1493 using namespace AMDGPU::EncValues;
1497 bool IsAGPR = Val & 512;
1500 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1510 bool MandatoryLiteral,
unsigned ImmWidth,
1514 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1515 using namespace AMDGPU::EncValues;
1519 static_assert(SGPR_MIN == 0);
1528 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1531 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1534 if (Val == LITERAL_CONST) {
1535 if (MandatoryLiteral)
1558 unsigned Val)
const {
1564 Val |= ~XDstReg & 1;
1570 using namespace AMDGPU;
1606 using namespace AMDGPU;
1640 using namespace AMDGPU::SDWA;
1641 using namespace AMDGPU::EncValues;
1647 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1648 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1650 Val - SDWA9EncValues::SRC_VGPR_MIN);
1652 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1653 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1654 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1656 Val - SDWA9EncValues::SRC_SGPR_MIN);
1658 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1659 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1661 Val - SDWA9EncValues::SRC_TTMP_MIN);
1664 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1666 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1669 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1673 }
else if (
STI.
hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1688 using namespace AMDGPU::SDWA;
1692 "SDWAVopcDst should be present only on GFX9+");
1694 bool IsWave64 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize64);
1696 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1697 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1765 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1787 if (PopCount == 1) {
1788 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1790 S <<
"bits in range ("
1791 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
1792 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1798#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1799#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1801 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1803#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1805 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1806 << GET_FIELD(MASK) << '\n'; \
1809#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1811 if (FourByteBuffer & (MASK)) { \
1812 return createStringError(std::errc::invalid_argument, \
1813 "kernel descriptor " DESC \
1814 " reserved %s set" MSG, \
1815 getBitRangeFromMask((MASK), 0).c_str()); \
1819#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1820#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1821 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1822#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1823 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1824#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1825 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1830 using namespace amdhsa;
1838 uint32_t GranulatedWorkitemVGPRCount =
1839 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1842 (GranulatedWorkitemVGPRCount + 1) *
1845 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
1866 uint32_t GranulatedWavefrontSGPRCount =
1867 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1871 "must be zero on gfx10+");
1873 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1876 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
1878 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
1879 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
1880 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
1885 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1887 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1889 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1891 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1897 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1903 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1909 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1913 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
1919 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
1923 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1924 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1925 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1930 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1938 using namespace amdhsa;
1942 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1944 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
1945 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1947 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1949 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1951 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1953 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1955 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1962 ".amdhsa_exception_fp_ieee_invalid_op",
1963 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1965 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1967 ".amdhsa_exception_fp_ieee_div_zero",
1968 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1970 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1972 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1974 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1976 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1986 using namespace amdhsa;
1989 KdStream << Indent <<
".amdhsa_accum_offset "
1990 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1993 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1996 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
1998 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2002 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2004 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2007 "SHARED_VGPR_COUNT",
2008 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2012 "COMPUTE_PGM_RSRC3",
2013 "must be zero on gfx12+");
2019 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2021 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2023 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2026 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2029 "COMPUTE_PGM_RSRC3",
2030 "must be zero on gfx10");
2035 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2040 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2043 "COMPUTE_PGM_RSRC3",
2044 "must be zero on gfx10 or gfx11");
2049 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2054 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2057 "COMPUTE_PGM_RSRC3",
2058 "must be zero on gfx10");
2060 }
else if (FourByteBuffer) {
2062 std::errc::invalid_argument,
2063 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2067#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2068#undef PRINT_DIRECTIVE
2070#undef CHECK_RESERVED_BITS_IMPL
2071#undef CHECK_RESERVED_BITS
2072#undef CHECK_RESERVED_BITS_MSG
2073#undef CHECK_RESERVED_BITS_DESC
2074#undef CHECK_RESERVED_BITS_DESC_MSG
2079 const char *Msg =
"") {
2081 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2088 unsigned WidthInBytes) {
2092 std::errc::invalid_argument,
2093 "kernel descriptor reserved bits in range (%u:%u) set",
2094 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2100#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2102 KdStream << Indent << DIRECTIVE " " \
2103 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2115 switch (Cursor.
tell()) {
2117 FourByteBuffer = DE.
getU32(Cursor);
2118 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2123 FourByteBuffer = DE.
getU32(Cursor);
2124 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2125 << FourByteBuffer <<
'\n';
2129 FourByteBuffer = DE.
getU32(Cursor);
2130 KdStream << Indent <<
".amdhsa_kernarg_size "
2131 << FourByteBuffer <<
'\n';
2136 ReservedBytes = DE.
getBytes(Cursor, 4);
2137 for (
int I = 0;
I < 4; ++
I) {
2138 if (ReservedBytes[
I] != 0)
2152 ReservedBytes = DE.
getBytes(Cursor, 20);
2153 for (
int I = 0;
I < 20; ++
I) {
2154 if (ReservedBytes[
I] != 0)
2160 FourByteBuffer = DE.
getU32(Cursor);
2164 FourByteBuffer = DE.
getU32(Cursor);
2168 FourByteBuffer = DE.
getU32(Cursor);
2172 using namespace amdhsa;
2173 TwoByteBuffer = DE.
getU16(Cursor);
2177 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2179 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2181 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2183 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2185 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2188 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2190 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2192 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2198 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2200 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2204 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2209 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2211 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2219 using namespace amdhsa;
2220 TwoByteBuffer = DE.
getU16(Cursor);
2221 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2223 KERNARG_PRELOAD_SPEC_LENGTH);
2226 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2228 KERNARG_PRELOAD_SPEC_OFFSET);
2234 ReservedBytes = DE.
getBytes(Cursor, 4);
2235 for (
int I = 0;
I < 4; ++
I) {
2236 if (ReservedBytes[
I] != 0)
2245#undef PRINT_DIRECTIVE
2252 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2254 "kernel descriptor must be 64-byte aligned");
2265 EnableWavefrontSize32 =
2267 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2272 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2275 while (
C &&
C.tell() < Bytes.
size()) {
2283 KdStream <<
".end_amdhsa_kernel\n";
2302 "code object v2 is not supported");
2337 if (Result != Symbols->end()) {
2344 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2363 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Symbolize and annotate disassembled instructions.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.