40#define DEBUG_TYPE "amdgpu-disassembler"
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
58 createConstantSymbolExpr(Symbol, Code);
60 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
82 std::advance(
I, OpIdx);
94 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
96 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
105 if (DAsm->isGFX12Plus()) {
106 Offset = SignExtend64<24>(Imm);
107 }
else if (DAsm->isVI()) {
110 Offset = SignExtend64<21>(Imm);
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t , \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
167 unsigned Imm,
unsigned EncImm,
168 bool MandatoryLiteral,
unsigned ImmWidth,
171 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
186template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
194template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
205template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
214template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
297 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
298 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
300 bool IsHi = Imm & (1 << 9);
301 unsigned RegIdx = Imm & 0xff;
303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
311 bool IsHi = Imm & (1 << 7);
312 unsigned RegIdx = Imm & 0x7f;
314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
326 bool IsHi = Imm & (1 << 7);
327 unsigned RegIdx = Imm & 0x7f;
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
330 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
331 OpWidth, Imm & 0xFF,
false, ImmWidth,
342 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
345 bool IsHi = Imm & (1 << 7);
346 unsigned RegIdx = Imm & 0x7f;
347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
349 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
350 OpWidth, Imm & 0xFF,
true, ImmWidth,
359 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
363 bool IsHi = Imm & (1 << 9);
364 unsigned RegIdx = Imm & 0xff;
365 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
367 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
368 OpWidth, Imm & 0xFF,
false, ImmWidth,
375 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
380 bool IsHi = Imm & (1 << 9);
381 unsigned RegIdx = Imm & 0xff;
382 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
389 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
395 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
408 auto Reg = Sub ? Sub :
Op.getReg();
409 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
416 if (!DAsm->isGFX90A()) {
425 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
427 : AMDGPU::OpName::vdata;
443 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
446template <AMDGPUDisassembler::OpW
idthTy Opw>
456 assert(Imm < (1 << 9) &&
"9-bit encoding");
463#define DECODE_SDWA(DecName) \
464DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
474 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
477#include "AMDGPUGenDisassemblerTables.inc"
486 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
487 Bytes = Bytes.
slice(
sizeof(
T));
494 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
495 Bytes = Bytes.
slice(8);
497 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
498 Bytes = Bytes.
slice(4);
505 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
506 Bytes = Bytes.
slice(8);
508 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
509 Bytes = Bytes.
slice(8);
517 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
518 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
522 Size = std::min((
size_t)4, Bytes_.
size());
548 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
550 }
else if (Bytes.
size() >= 16 &&
557 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
560 if (Bytes.
size() >= 8) {
561 const uint64_t QW = eatBytes<uint64_t>(Bytes);
619 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
623 if (Bytes.
size() >= 4) {
624 const uint32_t DW = eatBytes<uint32_t>(Bytes);
686 AMDGPU::OpName::src2_modifiers);
689 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
690 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
693 AMDGPU::OpName::src2_modifiers);
701 if (MCII->get(
MI.getOpcode()).TSFlags &
704 AMDGPU::OpName::cpol);
709 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
711 AMDGPU::OpName::cpol);
713 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
718 if ((MCII->get(
MI.getOpcode()).TSFlags &
724 if (TFEOpIdx != -1) {
725 auto *TFEIter =
MI.begin();
726 std::advance(TFEIter, TFEOpIdx);
731 if (MCII->get(
MI.getOpcode()).TSFlags &
735 if (SWZOpIdx != -1) {
736 auto *SWZIter =
MI.begin();
737 std::advance(SWZIter, SWZOpIdx);
747 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
748 if (VAddr0Idx >= 0 && NSAArgs > 0) {
749 unsigned NSAWords = (NSAArgs + 3) / 4;
750 if (Bytes.
size() < 4 * NSAWords)
752 for (
unsigned i = 0; i < NSAArgs; ++i) {
753 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
755 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
758 Bytes = Bytes.
slice(4 * NSAWords);
764 if (MCII->get(
MI.getOpcode()).TSFlags &
781 AMDGPU::OpName::vdst_in);
782 if (VDstIn_Idx != -1) {
783 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
785 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
786 !
MI.getOperand(VDstIn_Idx).isReg() ||
787 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
788 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
789 MI.erase(&
MI.getOperand(VDstIn_Idx));
792 AMDGPU::OpName::vdst_in);
799 if (ImmLitIdx != -1 && !IsSOPK)
802 Size = MaxInstBytesNum - Bytes.
size();
817 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
818 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
819 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
831 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
832 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
850 AMDGPU::OpName::sdst);
864 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
867 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
890 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
891 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
895 if (!AdjustedRegClassOpcode ||
896 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
899 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
912 unsigned OpSelHi = 0;
921 bool IsVOP3P =
false) {
923 unsigned Opc =
MI.getOpcode();
924 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
925 AMDGPU::OpName::src1_modifiers,
926 AMDGPU::OpName::src2_modifiers};
927 for (
int J = 0; J < 3; ++J) {
932 unsigned Val =
MI.getOperand(OpIdx).getImm();
950 const unsigned Opc =
MI.getOpcode();
953 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
954 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
956 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
958 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
960 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
962 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
965 if (OpIdx == -1 || OpModsIdx == -1)
973 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
974 unsigned ModVal = OpMods.
getImm();
975 if (ModVal & OpSelMask) {
985 constexpr int DST_IDX = 0;
986 auto Opcode =
MI.getOpcode();
987 const auto &
Desc = MCII->get(Opcode);
990 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1005 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1008 AMDGPU::OpName::src2_modifiers);
1012 unsigned Opc =
MI.getOpcode();
1016 if (VDstInIdx != -1)
1019 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1020 if (
MI.getNumOperands() < DescNumOps &&
1025 AMDGPU::OpName::op_sel);
1028 if (
MI.getNumOperands() < DescNumOps &&
1031 AMDGPU::OpName::src0_modifiers);
1033 if (
MI.getNumOperands() < DescNumOps &&
1036 AMDGPU::OpName::src1_modifiers);
1045 if (VDstInIdx != -1)
1048 unsigned Opc =
MI.getOpcode();
1049 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1050 if (
MI.getNumOperands() < DescNumOps &&
1054 AMDGPU::OpName::op_sel);
1062 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1065 AMDGPU::OpName::vdst);
1068 AMDGPU::OpName::vdata);
1072 : AMDGPU::OpName::rsrc;
1075 AMDGPU::OpName::dmask);
1078 AMDGPU::OpName::tfe);
1080 AMDGPU::OpName::d16);
1087 if (BaseOpcode->
BVH) {
1093 bool IsAtomic = (VDstIdx != -1);
1097 bool IsPartialNSA =
false;
1098 unsigned AddrSize =
Info->VAddrDwords;
1107 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1114 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1115 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1116 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1118 if (!IsVSample && AddrSize > 12)
1121 if (AddrSize >
Info->VAddrDwords) {
1127 IsPartialNSA =
true;
1132 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1133 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1135 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1137 DstSize = (DstSize + 1) / 2;
1140 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1143 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1148 if (NewOpcode == -1)
1153 if (DstSize !=
Info->VDataDwords) {
1154 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1159 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1172 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1174 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1175 AddrSize !=
Info->VAddrDwords) {
1176 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1178 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1180 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1187 MI.setOpcode(NewOpcode);
1189 if (NewVdata != AMDGPU::NoRegister) {
1201 assert(AddrSize <= Info->VAddrDwords);
1202 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1203 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1211 unsigned Opc =
MI.getOpcode();
1212 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1215 if (
MI.getNumOperands() < DescNumOps &&
1219 if (
MI.getNumOperands() < DescNumOps &&
1222 AMDGPU::OpName::op_sel);
1223 if (
MI.getNumOperands() < DescNumOps &&
1226 AMDGPU::OpName::op_sel_hi);
1227 if (
MI.getNumOperands() < DescNumOps &&
1230 AMDGPU::OpName::neg_lo);
1231 if (
MI.getNumOperands() < DescNumOps &&
1234 AMDGPU::OpName::neg_hi);
1239 unsigned Opc =
MI.getOpcode();
1240 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1242 if (
MI.getNumOperands() < DescNumOps &&
1246 if (
MI.getNumOperands() < DescNumOps &&
1249 AMDGPU::OpName::src0_modifiers);
1251 if (
MI.getNumOperands() < DescNumOps &&
1254 AMDGPU::OpName::src1_modifiers);
1258 assert(HasLiteral &&
"Should have decoded a literal");
1260 unsigned DescNumOps =
Desc.getNumOperands();
1262 AMDGPU::OpName::immDeferred);
1263 assert(DescNumOps ==
MI.getNumOperands());
1264 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1265 auto &
Op =
MI.getOperand(
I);
1266 auto OpType =
Desc.operands()[
I].OperandType;
1282 const Twine& ErrMsg)
const {
1297 unsigned Val)
const {
1298 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1299 if (Val >= RegCl.getNumRegs())
1301 ": unknown register " +
Twine(Val));
1307 unsigned Val)
const {
1311 switch (SRegClassID) {
1312 case AMDGPU::SGPR_32RegClassID:
1313 case AMDGPU::TTMP_32RegClassID:
1315 case AMDGPU::SGPR_64RegClassID:
1316 case AMDGPU::TTMP_64RegClassID:
1319 case AMDGPU::SGPR_96RegClassID:
1320 case AMDGPU::TTMP_96RegClassID:
1321 case AMDGPU::SGPR_128RegClassID:
1322 case AMDGPU::TTMP_128RegClassID:
1325 case AMDGPU::SGPR_256RegClassID:
1326 case AMDGPU::TTMP_256RegClassID:
1329 case AMDGPU::SGPR_288RegClassID:
1330 case AMDGPU::TTMP_288RegClassID:
1331 case AMDGPU::SGPR_320RegClassID:
1332 case AMDGPU::TTMP_320RegClassID:
1333 case AMDGPU::SGPR_352RegClassID:
1334 case AMDGPU::TTMP_352RegClassID:
1335 case AMDGPU::SGPR_384RegClassID:
1336 case AMDGPU::TTMP_384RegClassID:
1337 case AMDGPU::SGPR_512RegClassID:
1338 case AMDGPU::TTMP_512RegClassID:
1347 if (Val % (1 << shift)) {
1349 <<
": scalar reg isn't aligned " << Val;
1357 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1367 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1369 return errOperand(Val,
"More than one unique literal is illegal");
1381 if (Bytes.
size() < 4) {
1382 return errOperand(0,
"cannot read literal, inst bytes left " +
1386 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1394 using namespace AMDGPU::EncValues;
1396 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1398 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1399 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1406 return llvm::bit_cast<uint32_t>(0.5f);
1408 return llvm::bit_cast<uint32_t>(-0.5f);
1410 return llvm::bit_cast<uint32_t>(1.0f);
1412 return llvm::bit_cast<uint32_t>(-1.0f);
1414 return llvm::bit_cast<uint32_t>(2.0f);
1416 return llvm::bit_cast<uint32_t>(-2.0f);
1418 return llvm::bit_cast<uint32_t>(4.0f);
1420 return llvm::bit_cast<uint32_t>(-4.0f);
1431 return llvm::bit_cast<uint64_t>(0.5);
1433 return llvm::bit_cast<uint64_t>(-0.5);
1435 return llvm::bit_cast<uint64_t>(1.0);
1437 return llvm::bit_cast<uint64_t>(-1.0);
1439 return llvm::bit_cast<uint64_t>(2.0);
1441 return llvm::bit_cast<uint64_t>(-2.0);
1443 return llvm::bit_cast<uint64_t>(4.0);
1445 return llvm::bit_cast<uint64_t>(-4.0);
1447 return 0x3fc45f306dc9c882;
1531 using namespace AMDGPU;
1539 return VGPR_32RegClassID;
1541 case OPWV232:
return VReg_64RegClassID;
1542 case OPW96:
return VReg_96RegClassID;
1543 case OPW128:
return VReg_128RegClassID;
1544 case OPW192:
return VReg_192RegClassID;
1545 case OPW160:
return VReg_160RegClassID;
1546 case OPW256:
return VReg_256RegClassID;
1547 case OPW288:
return VReg_288RegClassID;
1548 case OPW320:
return VReg_320RegClassID;
1549 case OPW352:
return VReg_352RegClassID;
1550 case OPW384:
return VReg_384RegClassID;
1551 case OPW512:
return VReg_512RegClassID;
1552 case OPW1024:
return VReg_1024RegClassID;
1557 using namespace AMDGPU;
1565 return AGPR_32RegClassID;
1567 case OPWV232:
return AReg_64RegClassID;
1568 case OPW96:
return AReg_96RegClassID;
1569 case OPW128:
return AReg_128RegClassID;
1570 case OPW160:
return AReg_160RegClassID;
1571 case OPW256:
return AReg_256RegClassID;
1572 case OPW288:
return AReg_288RegClassID;
1573 case OPW320:
return AReg_320RegClassID;
1574 case OPW352:
return AReg_352RegClassID;
1575 case OPW384:
return AReg_384RegClassID;
1576 case OPW512:
return AReg_512RegClassID;
1577 case OPW1024:
return AReg_1024RegClassID;
1583 using namespace AMDGPU;
1591 return SGPR_32RegClassID;
1593 case OPWV232:
return SGPR_64RegClassID;
1594 case OPW96:
return SGPR_96RegClassID;
1595 case OPW128:
return SGPR_128RegClassID;
1596 case OPW160:
return SGPR_160RegClassID;
1597 case OPW256:
return SGPR_256RegClassID;
1598 case OPW288:
return SGPR_288RegClassID;
1599 case OPW320:
return SGPR_320RegClassID;
1600 case OPW352:
return SGPR_352RegClassID;
1601 case OPW384:
return SGPR_384RegClassID;
1602 case OPW512:
return SGPR_512RegClassID;
1607 using namespace AMDGPU;
1615 return TTMP_32RegClassID;
1617 case OPWV232:
return TTMP_64RegClassID;
1618 case OPW128:
return TTMP_128RegClassID;
1619 case OPW256:
return TTMP_256RegClassID;
1620 case OPW288:
return TTMP_288RegClassID;
1621 case OPW320:
return TTMP_320RegClassID;
1622 case OPW352:
return TTMP_352RegClassID;
1623 case OPW384:
return TTMP_384RegClassID;
1624 case OPW512:
return TTMP_512RegClassID;
1629 using namespace AMDGPU::EncValues;
1631 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1632 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1634 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1638 bool MandatoryLiteral,
1641 using namespace AMDGPU::EncValues;
1645 bool IsAGPR = Val & 512;
1648 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1658 bool MandatoryLiteral,
unsigned ImmWidth,
1662 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1663 using namespace AMDGPU::EncValues;
1667 static_assert(SGPR_MIN == 0);
1676 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1679 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1682 if (Val == LITERAL_CONST) {
1683 if (MandatoryLiteral)
1710 unsigned Val)
const {
1716 Val |= ~XDstReg & 1;
1722 using namespace AMDGPU;
1758 using namespace AMDGPU;
1789 using namespace AMDGPU;
1810 using namespace AMDGPU::SDWA;
1811 using namespace AMDGPU::EncValues;
1817 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1818 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1820 Val - SDWA9EncValues::SRC_VGPR_MIN);
1822 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1823 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1824 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1826 Val - SDWA9EncValues::SRC_SGPR_MIN);
1828 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1829 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1831 Val - SDWA9EncValues::SRC_TTMP_MIN);
1834 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1836 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1839 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1858 using namespace AMDGPU::SDWA;
1862 "SDWAVopcDst should be present only on GFX9+");
1864 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
1866 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1867 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1905 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
1908 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
1918 if (
I == Versions.end())
1968 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1990 if (PopCount == 1) {
1991 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1993 S <<
"bits in range ("
1994 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
1995 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2001#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2002#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2004 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2006#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2008 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2009 << GET_FIELD(MASK) << '\n'; \
2012#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2014 if (FourByteBuffer & (MASK)) { \
2015 return createStringError(std::errc::invalid_argument, \
2016 "kernel descriptor " DESC \
2017 " reserved %s set" MSG, \
2018 getBitRangeFromMask((MASK), 0).c_str()); \
2022#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2023#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2024 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2025#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2026 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2027#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2028 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2033 using namespace amdhsa;
2041 uint32_t GranulatedWorkitemVGPRCount =
2042 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2045 (GranulatedWorkitemVGPRCount + 1) *
2048 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2069 uint32_t GranulatedWavefrontSGPRCount =
2070 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2074 "must be zero on gfx10+");
2076 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2079 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2081 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2082 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2083 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2088 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2090 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2092 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2094 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2100 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2106 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2112 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2116 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2122 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
2126 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2127 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2128 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2133 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2141 using namespace amdhsa;
2145 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2147 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2148 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2150 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2152 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2154 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2156 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2158 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2165 ".amdhsa_exception_fp_ieee_invalid_op",
2166 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2168 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2170 ".amdhsa_exception_fp_ieee_div_zero",
2171 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2173 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2175 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2177 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2179 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2189 using namespace amdhsa;
2192 KdStream << Indent <<
".amdhsa_accum_offset "
2193 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2196 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2199 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2201 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2205 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2207 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2210 "SHARED_VGPR_COUNT",
2211 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2215 "COMPUTE_PGM_RSRC3",
2216 "must be zero on gfx12+");
2222 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2224 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2226 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2229 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2232 "COMPUTE_PGM_RSRC3",
2233 "must be zero on gfx10");
2238 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2243 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2246 "COMPUTE_PGM_RSRC3",
2247 "must be zero on gfx10 or gfx11");
2252 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2257 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2260 "COMPUTE_PGM_RSRC3",
2261 "must be zero on gfx10");
2263 }
else if (FourByteBuffer) {
2265 std::errc::invalid_argument,
2266 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2270#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2271#undef PRINT_DIRECTIVE
2273#undef CHECK_RESERVED_BITS_IMPL
2274#undef CHECK_RESERVED_BITS
2275#undef CHECK_RESERVED_BITS_MSG
2276#undef CHECK_RESERVED_BITS_DESC
2277#undef CHECK_RESERVED_BITS_DESC_MSG
2282 const char *Msg =
"") {
2284 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2291 unsigned WidthInBytes) {
2295 std::errc::invalid_argument,
2296 "kernel descriptor reserved bits in range (%u:%u) set",
2297 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2303#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2305 KdStream << Indent << DIRECTIVE " " \
2306 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2318 switch (Cursor.
tell()) {
2320 FourByteBuffer = DE.
getU32(Cursor);
2321 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2326 FourByteBuffer = DE.
getU32(Cursor);
2327 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2328 << FourByteBuffer <<
'\n';
2332 FourByteBuffer = DE.
getU32(Cursor);
2333 KdStream << Indent <<
".amdhsa_kernarg_size "
2334 << FourByteBuffer <<
'\n';
2339 ReservedBytes = DE.
getBytes(Cursor, 4);
2340 for (
int I = 0;
I < 4; ++
I) {
2341 if (ReservedBytes[
I] != 0)
2355 ReservedBytes = DE.
getBytes(Cursor, 20);
2356 for (
int I = 0;
I < 20; ++
I) {
2357 if (ReservedBytes[
I] != 0)
2363 FourByteBuffer = DE.
getU32(Cursor);
2367 FourByteBuffer = DE.
getU32(Cursor);
2371 FourByteBuffer = DE.
getU32(Cursor);
2375 using namespace amdhsa;
2376 TwoByteBuffer = DE.
getU16(Cursor);
2380 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2382 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2384 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2386 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2388 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2391 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2393 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2395 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2401 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2403 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2408 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2413 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2415 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2423 using namespace amdhsa;
2424 TwoByteBuffer = DE.
getU16(Cursor);
2425 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2427 KERNARG_PRELOAD_SPEC_LENGTH);
2430 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2432 KERNARG_PRELOAD_SPEC_OFFSET);
2438 ReservedBytes = DE.
getBytes(Cursor, 4);
2439 for (
int I = 0;
I < 4; ++
I) {
2440 if (ReservedBytes[
I] != 0)
2449#undef PRINT_DIRECTIVE
2456 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2458 "kernel descriptor must be 64-byte aligned");
2469 EnableWavefrontSize32 =
2471 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2476 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2479 while (
C &&
C.tell() < Bytes.
size()) {
2487 KdStream <<
".end_amdhsa_kernel\n";
2506 "code object v2 is not supported");
2519const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2525 if (!
Sym->isVariable()) {
2529 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2530 if (!Valid || Res != Val)
2558 if (Result != Symbols->end()) {
2565 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2584 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.