40#define DEBUG_TYPE "amdgpu-disassembler"
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
58 createConstantSymbolExpr(Symbol, Code);
60 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
82 std::advance(
I, OpIdx);
94 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
96 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
105 if (DAsm->isGFX12Plus()) {
106 Offset = SignExtend64<24>(Imm);
107 }
else if (DAsm->isVI()) {
110 Offset = SignExtend64<21>(Imm);
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t , \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
167 unsigned Imm,
unsigned EncImm,
168 bool MandatoryLiteral,
unsigned ImmWidth,
171 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
186template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
194template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
205template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
214template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
295 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
296 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
298 bool IsHi = Imm & (1 << 9);
299 unsigned RegIdx = Imm & 0xff;
301 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
307 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
309 bool IsHi = Imm & (1 << 7);
310 unsigned RegIdx = Imm & 0x7f;
312 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
324 bool IsHi = Imm & (1 << 7);
325 unsigned RegIdx = Imm & 0x7f;
326 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
328 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
329 OpWidth, Imm & 0xFF,
false, ImmWidth,
340 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
343 bool IsHi = Imm & (1 << 7);
344 unsigned RegIdx = Imm & 0x7f;
345 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
347 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
348 OpWidth, Imm & 0xFF,
true, ImmWidth,
357 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
361 bool IsHi = Imm & (1 << 9);
362 unsigned RegIdx = Imm & 0xff;
363 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
365 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
366 OpWidth, Imm & 0xFF,
false, ImmWidth,
373 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
378 bool IsHi = Imm & (1 << 9);
379 unsigned RegIdx = Imm & 0xff;
380 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
387 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
393 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
406 auto Reg = Sub ? Sub :
Op.getReg();
407 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
414 if (!DAsm->isGFX90A()) {
423 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
425 : AMDGPU::OpName::vdata;
441 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
444template <AMDGPUDisassembler::OpW
idthTy Opw>
454 assert(Imm < (1 << 9) &&
"9-bit encoding");
461#define DECODE_SDWA(DecName) \
462DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
472 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
475#include "AMDGPUGenDisassemblerTables.inc"
484 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
485 Bytes = Bytes.
slice(
sizeof(
T));
492 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
493 Bytes = Bytes.
slice(8);
495 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
496 Bytes = Bytes.
slice(4);
503 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
504 Bytes = Bytes.
slice(8);
506 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
507 Bytes = Bytes.
slice(8);
515 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
516 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
520 Size = std::min((
size_t)4, Bytes_.
size());
546 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
548 }
else if (Bytes.
size() >= 16 &&
555 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
558 if (Bytes.
size() >= 8) {
559 const uint64_t QW = eatBytes<uint64_t>(Bytes);
617 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
621 if (Bytes.
size() >= 4) {
622 const uint32_t DW = eatBytes<uint32_t>(Bytes);
684 AMDGPU::OpName::src2_modifiers);
687 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
688 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
691 AMDGPU::OpName::src2_modifiers);
699 if (MCII->get(
MI.getOpcode()).TSFlags &
702 AMDGPU::OpName::cpol);
707 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
709 AMDGPU::OpName::cpol);
711 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
716 if ((MCII->get(
MI.getOpcode()).TSFlags &
722 if (TFEOpIdx != -1) {
723 auto *TFEIter =
MI.begin();
724 std::advance(TFEIter, TFEOpIdx);
729 if (MCII->get(
MI.getOpcode()).TSFlags &
733 if (SWZOpIdx != -1) {
734 auto *SWZIter =
MI.begin();
735 std::advance(SWZIter, SWZOpIdx);
745 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
746 if (VAddr0Idx >= 0 && NSAArgs > 0) {
747 unsigned NSAWords = (NSAArgs + 3) / 4;
748 if (Bytes.
size() < 4 * NSAWords)
750 for (
unsigned i = 0; i < NSAArgs; ++i) {
751 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
753 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
756 Bytes = Bytes.
slice(4 * NSAWords);
762 if (MCII->get(
MI.getOpcode()).TSFlags &
779 AMDGPU::OpName::vdst_in);
780 if (VDstIn_Idx != -1) {
781 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
783 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
784 !
MI.getOperand(VDstIn_Idx).isReg() ||
785 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
786 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
787 MI.erase(&
MI.getOperand(VDstIn_Idx));
790 AMDGPU::OpName::vdst_in);
797 if (ImmLitIdx != -1 && !IsSOPK)
800 Size = MaxInstBytesNum - Bytes.
size();
815 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
816 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
817 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
818 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
819 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
848 AMDGPU::OpName::sdst);
862 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
865 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
888 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
889 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
893 if (!AdjustedRegClassOpcode ||
894 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
897 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
910 unsigned OpSelHi = 0;
919 bool IsVOP3P =
false) {
921 unsigned Opc =
MI.getOpcode();
922 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
923 AMDGPU::OpName::src1_modifiers,
924 AMDGPU::OpName::src2_modifiers};
925 for (
int J = 0; J < 3; ++J) {
930 unsigned Val =
MI.getOperand(OpIdx).getImm();
948 const unsigned Opc =
MI.getOpcode();
951 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
952 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
954 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
956 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
958 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
960 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
963 if (OpIdx == -1 || OpModsIdx == -1)
971 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
972 unsigned ModVal = OpMods.
getImm();
973 if (ModVal & OpSelMask) {
983 constexpr int DST_IDX = 0;
984 auto Opcode =
MI.getOpcode();
985 const auto &
Desc = MCII->get(Opcode);
988 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1003 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1006 AMDGPU::OpName::src2_modifiers);
1010 unsigned Opc =
MI.getOpcode();
1014 if (VDstInIdx != -1)
1017 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1018 if (
MI.getNumOperands() < DescNumOps &&
1023 AMDGPU::OpName::op_sel);
1026 if (
MI.getNumOperands() < DescNumOps &&
1029 AMDGPU::OpName::src0_modifiers);
1031 if (
MI.getNumOperands() < DescNumOps &&
1034 AMDGPU::OpName::src1_modifiers);
1043 if (VDstInIdx != -1)
1046 unsigned Opc =
MI.getOpcode();
1047 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1048 if (
MI.getNumOperands() < DescNumOps &&
1052 AMDGPU::OpName::op_sel);
1060 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1063 AMDGPU::OpName::vdst);
1066 AMDGPU::OpName::vdata);
1070 : AMDGPU::OpName::rsrc;
1073 AMDGPU::OpName::dmask);
1076 AMDGPU::OpName::tfe);
1078 AMDGPU::OpName::d16);
1085 if (BaseOpcode->
BVH) {
1091 bool IsAtomic = (VDstIdx != -1);
1095 bool IsPartialNSA =
false;
1096 unsigned AddrSize =
Info->VAddrDwords;
1105 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1112 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1113 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1114 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1116 if (!IsVSample && AddrSize > 12)
1119 if (AddrSize >
Info->VAddrDwords) {
1125 IsPartialNSA =
true;
1130 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1131 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1133 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1135 DstSize = (DstSize + 1) / 2;
1138 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1141 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1146 if (NewOpcode == -1)
1151 if (DstSize !=
Info->VDataDwords) {
1152 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1157 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1170 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1172 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1173 AddrSize !=
Info->VAddrDwords) {
1174 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1176 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1178 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1185 MI.setOpcode(NewOpcode);
1187 if (NewVdata != AMDGPU::NoRegister) {
1199 assert(AddrSize <= Info->VAddrDwords);
1200 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1201 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1209 unsigned Opc =
MI.getOpcode();
1210 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1213 if (
MI.getNumOperands() < DescNumOps &&
1217 if (
MI.getNumOperands() < DescNumOps &&
1220 AMDGPU::OpName::op_sel);
1221 if (
MI.getNumOperands() < DescNumOps &&
1224 AMDGPU::OpName::op_sel_hi);
1225 if (
MI.getNumOperands() < DescNumOps &&
1228 AMDGPU::OpName::neg_lo);
1229 if (
MI.getNumOperands() < DescNumOps &&
1232 AMDGPU::OpName::neg_hi);
1237 unsigned Opc =
MI.getOpcode();
1238 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1240 if (
MI.getNumOperands() < DescNumOps &&
1244 if (
MI.getNumOperands() < DescNumOps &&
1247 AMDGPU::OpName::src0_modifiers);
1249 if (
MI.getNumOperands() < DescNumOps &&
1252 AMDGPU::OpName::src1_modifiers);
1256 assert(HasLiteral &&
"Should have decoded a literal");
1258 unsigned DescNumOps =
Desc.getNumOperands();
1260 AMDGPU::OpName::immDeferred);
1261 assert(DescNumOps ==
MI.getNumOperands());
1262 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1263 auto &
Op =
MI.getOperand(
I);
1264 auto OpType =
Desc.operands()[
I].OperandType;
1280 const Twine& ErrMsg)
const {
1295 unsigned Val)
const {
1296 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1297 if (Val >= RegCl.getNumRegs())
1299 ": unknown register " +
Twine(Val));
1305 unsigned Val)
const {
1309 switch (SRegClassID) {
1310 case AMDGPU::SGPR_32RegClassID:
1311 case AMDGPU::TTMP_32RegClassID:
1313 case AMDGPU::SGPR_64RegClassID:
1314 case AMDGPU::TTMP_64RegClassID:
1317 case AMDGPU::SGPR_96RegClassID:
1318 case AMDGPU::TTMP_96RegClassID:
1319 case AMDGPU::SGPR_128RegClassID:
1320 case AMDGPU::TTMP_128RegClassID:
1323 case AMDGPU::SGPR_256RegClassID:
1324 case AMDGPU::TTMP_256RegClassID:
1327 case AMDGPU::SGPR_288RegClassID:
1328 case AMDGPU::TTMP_288RegClassID:
1329 case AMDGPU::SGPR_320RegClassID:
1330 case AMDGPU::TTMP_320RegClassID:
1331 case AMDGPU::SGPR_352RegClassID:
1332 case AMDGPU::TTMP_352RegClassID:
1333 case AMDGPU::SGPR_384RegClassID:
1334 case AMDGPU::TTMP_384RegClassID:
1335 case AMDGPU::SGPR_512RegClassID:
1336 case AMDGPU::TTMP_512RegClassID:
1345 if (Val % (1 << shift)) {
1347 <<
": scalar reg isn't aligned " << Val;
1355 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1365 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1367 return errOperand(Val,
"More than one unique literal is illegal");
1379 if (Bytes.
size() < 4) {
1380 return errOperand(0,
"cannot read literal, inst bytes left " +
1384 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1392 using namespace AMDGPU::EncValues;
1394 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1396 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1397 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1404 return llvm::bit_cast<uint32_t>(0.5f);
1406 return llvm::bit_cast<uint32_t>(-0.5f);
1408 return llvm::bit_cast<uint32_t>(1.0f);
1410 return llvm::bit_cast<uint32_t>(-1.0f);
1412 return llvm::bit_cast<uint32_t>(2.0f);
1414 return llvm::bit_cast<uint32_t>(-2.0f);
1416 return llvm::bit_cast<uint32_t>(4.0f);
1418 return llvm::bit_cast<uint32_t>(-4.0f);
1429 return llvm::bit_cast<uint64_t>(0.5);
1431 return llvm::bit_cast<uint64_t>(-0.5);
1433 return llvm::bit_cast<uint64_t>(1.0);
1435 return llvm::bit_cast<uint64_t>(-1.0);
1437 return llvm::bit_cast<uint64_t>(2.0);
1439 return llvm::bit_cast<uint64_t>(-2.0);
1441 return llvm::bit_cast<uint64_t>(4.0);
1443 return llvm::bit_cast<uint64_t>(-4.0);
1445 return 0x3fc45f306dc9c882;
1529 using namespace AMDGPU;
1537 return VGPR_32RegClassID;
1539 case OPWV232:
return VReg_64RegClassID;
1540 case OPW96:
return VReg_96RegClassID;
1541 case OPW128:
return VReg_128RegClassID;
1542 case OPW192:
return VReg_192RegClassID;
1543 case OPW160:
return VReg_160RegClassID;
1544 case OPW256:
return VReg_256RegClassID;
1545 case OPW288:
return VReg_288RegClassID;
1546 case OPW320:
return VReg_320RegClassID;
1547 case OPW352:
return VReg_352RegClassID;
1548 case OPW384:
return VReg_384RegClassID;
1549 case OPW512:
return VReg_512RegClassID;
1550 case OPW1024:
return VReg_1024RegClassID;
1555 using namespace AMDGPU;
1563 return AGPR_32RegClassID;
1565 case OPWV232:
return AReg_64RegClassID;
1566 case OPW96:
return AReg_96RegClassID;
1567 case OPW128:
return AReg_128RegClassID;
1568 case OPW160:
return AReg_160RegClassID;
1569 case OPW256:
return AReg_256RegClassID;
1570 case OPW288:
return AReg_288RegClassID;
1571 case OPW320:
return AReg_320RegClassID;
1572 case OPW352:
return AReg_352RegClassID;
1573 case OPW384:
return AReg_384RegClassID;
1574 case OPW512:
return AReg_512RegClassID;
1575 case OPW1024:
return AReg_1024RegClassID;
1581 using namespace AMDGPU;
1589 return SGPR_32RegClassID;
1591 case OPWV232:
return SGPR_64RegClassID;
1592 case OPW96:
return SGPR_96RegClassID;
1593 case OPW128:
return SGPR_128RegClassID;
1594 case OPW160:
return SGPR_160RegClassID;
1595 case OPW256:
return SGPR_256RegClassID;
1596 case OPW288:
return SGPR_288RegClassID;
1597 case OPW320:
return SGPR_320RegClassID;
1598 case OPW352:
return SGPR_352RegClassID;
1599 case OPW384:
return SGPR_384RegClassID;
1600 case OPW512:
return SGPR_512RegClassID;
1605 using namespace AMDGPU;
1613 return TTMP_32RegClassID;
1615 case OPWV232:
return TTMP_64RegClassID;
1616 case OPW128:
return TTMP_128RegClassID;
1617 case OPW256:
return TTMP_256RegClassID;
1618 case OPW288:
return TTMP_288RegClassID;
1619 case OPW320:
return TTMP_320RegClassID;
1620 case OPW352:
return TTMP_352RegClassID;
1621 case OPW384:
return TTMP_384RegClassID;
1622 case OPW512:
return TTMP_512RegClassID;
1627 using namespace AMDGPU::EncValues;
1629 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1630 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1632 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1636 bool MandatoryLiteral,
1639 using namespace AMDGPU::EncValues;
1643 bool IsAGPR = Val & 512;
1646 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1656 bool MandatoryLiteral,
unsigned ImmWidth,
1660 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1661 using namespace AMDGPU::EncValues;
1665 static_assert(SGPR_MIN == 0);
1674 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1677 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1680 if (Val == LITERAL_CONST) {
1681 if (MandatoryLiteral)
1703 unsigned Val)
const {
1709 Val |= ~XDstReg & 1;
1715 using namespace AMDGPU;
1751 using namespace AMDGPU;
1785 using namespace AMDGPU::SDWA;
1786 using namespace AMDGPU::EncValues;
1792 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1793 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1795 Val - SDWA9EncValues::SRC_VGPR_MIN);
1797 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1798 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1799 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1801 Val - SDWA9EncValues::SRC_SGPR_MIN);
1803 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1804 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1806 Val - SDWA9EncValues::SRC_TTMP_MIN);
1809 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1811 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1814 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1833 using namespace AMDGPU::SDWA;
1837 "SDWAVopcDst should be present only on GFX9+");
1839 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
1841 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1842 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1880 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
1883 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
1893 if (
I == Versions.end())
1943 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1965 if (PopCount == 1) {
1966 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1968 S <<
"bits in range ("
1969 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
1970 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1976#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1977#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1979 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1981#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1983 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1984 << GET_FIELD(MASK) << '\n'; \
1987#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1989 if (FourByteBuffer & (MASK)) { \
1990 return createStringError(std::errc::invalid_argument, \
1991 "kernel descriptor " DESC \
1992 " reserved %s set" MSG, \
1993 getBitRangeFromMask((MASK), 0).c_str()); \
1997#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1998#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1999 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2000#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2001 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2002#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2003 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2008 using namespace amdhsa;
2016 uint32_t GranulatedWorkitemVGPRCount =
2017 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2020 (GranulatedWorkitemVGPRCount + 1) *
2023 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2044 uint32_t GranulatedWavefrontSGPRCount =
2045 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2049 "must be zero on gfx10+");
2051 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2054 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2056 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2057 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2058 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2063 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2065 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2067 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2069 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2075 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2081 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2087 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2091 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2097 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
2101 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2102 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2103 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2108 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2116 using namespace amdhsa;
2120 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2122 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2123 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2125 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2127 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2129 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2131 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2133 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2140 ".amdhsa_exception_fp_ieee_invalid_op",
2141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2145 ".amdhsa_exception_fp_ieee_div_zero",
2146 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2150 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2154 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2164 using namespace amdhsa;
2167 KdStream << Indent <<
".amdhsa_accum_offset "
2168 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2171 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2174 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2176 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2180 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2182 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2185 "SHARED_VGPR_COUNT",
2186 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2190 "COMPUTE_PGM_RSRC3",
2191 "must be zero on gfx12+");
2197 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2199 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2201 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2204 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2207 "COMPUTE_PGM_RSRC3",
2208 "must be zero on gfx10");
2213 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2218 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2221 "COMPUTE_PGM_RSRC3",
2222 "must be zero on gfx10 or gfx11");
2227 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2232 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2235 "COMPUTE_PGM_RSRC3",
2236 "must be zero on gfx10");
2238 }
else if (FourByteBuffer) {
2240 std::errc::invalid_argument,
2241 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2245#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2246#undef PRINT_DIRECTIVE
2248#undef CHECK_RESERVED_BITS_IMPL
2249#undef CHECK_RESERVED_BITS
2250#undef CHECK_RESERVED_BITS_MSG
2251#undef CHECK_RESERVED_BITS_DESC
2252#undef CHECK_RESERVED_BITS_DESC_MSG
2257 const char *Msg =
"") {
2259 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2266 unsigned WidthInBytes) {
2270 std::errc::invalid_argument,
2271 "kernel descriptor reserved bits in range (%u:%u) set",
2272 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2278#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2280 KdStream << Indent << DIRECTIVE " " \
2281 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2293 switch (Cursor.
tell()) {
2295 FourByteBuffer = DE.
getU32(Cursor);
2296 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2301 FourByteBuffer = DE.
getU32(Cursor);
2302 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2303 << FourByteBuffer <<
'\n';
2307 FourByteBuffer = DE.
getU32(Cursor);
2308 KdStream << Indent <<
".amdhsa_kernarg_size "
2309 << FourByteBuffer <<
'\n';
2314 ReservedBytes = DE.
getBytes(Cursor, 4);
2315 for (
int I = 0;
I < 4; ++
I) {
2316 if (ReservedBytes[
I] != 0)
2330 ReservedBytes = DE.
getBytes(Cursor, 20);
2331 for (
int I = 0;
I < 20; ++
I) {
2332 if (ReservedBytes[
I] != 0)
2338 FourByteBuffer = DE.
getU32(Cursor);
2342 FourByteBuffer = DE.
getU32(Cursor);
2346 FourByteBuffer = DE.
getU32(Cursor);
2350 using namespace amdhsa;
2351 TwoByteBuffer = DE.
getU16(Cursor);
2355 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2357 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2359 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2361 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2363 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2366 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2368 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2370 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2376 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2378 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2383 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2388 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2390 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2398 using namespace amdhsa;
2399 TwoByteBuffer = DE.
getU16(Cursor);
2400 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2402 KERNARG_PRELOAD_SPEC_LENGTH);
2405 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2407 KERNARG_PRELOAD_SPEC_OFFSET);
2413 ReservedBytes = DE.
getBytes(Cursor, 4);
2414 for (
int I = 0;
I < 4; ++
I) {
2415 if (ReservedBytes[
I] != 0)
2424#undef PRINT_DIRECTIVE
2431 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2433 "kernel descriptor must be 64-byte aligned");
2444 EnableWavefrontSize32 =
2446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2451 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2454 while (
C &&
C.tell() < Bytes.
size()) {
2462 KdStream <<
".end_amdhsa_kernel\n";
2481 "code object v2 is not supported");
2494const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2500 if (!
Sym->isVariable()) {
2504 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2505 if (!Valid || Res != Val)
2533 if (Result != Symbols->end()) {
2540 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2559 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.