40#define DEBUG_TYPE "amdgpu-disassembler"
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
58 createConstantSymbolExpr(Symbol, Code);
60 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
82 std::advance(
I, OpIdx);
94 int64_t
Offset = SignExtend64<16>(Imm) * 4 + 4 +
Addr;
96 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
105 if (DAsm->isGFX12Plus()) {
106 Offset = SignExtend64<24>(Imm);
107 }
else if (DAsm->isVI()) {
110 Offset = SignExtend64<21>(Imm);
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t , \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
167 unsigned Imm,
unsigned EncImm,
168 bool MandatoryLiteral,
unsigned ImmWidth,
171 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
186template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
194template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
205template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
214template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
297 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
298 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
300 bool IsHi = Imm & (1 << 9);
301 unsigned RegIdx = Imm & 0xff;
303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
311 bool IsHi = Imm & (1 << 7);
312 unsigned RegIdx = Imm & 0x7f;
314 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
326 bool IsHi = Imm & (1 << 7);
327 unsigned RegIdx = Imm & 0x7f;
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
330 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
331 OpWidth, Imm & 0xFF,
false, ImmWidth,
342 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
345 bool IsHi = Imm & (1 << 7);
346 unsigned RegIdx = Imm & 0x7f;
347 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
349 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
350 OpWidth, Imm & 0xFF,
true, ImmWidth,
359 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
363 bool IsHi = Imm & (1 << 9);
364 unsigned RegIdx = Imm & 0xff;
365 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
367 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
368 OpWidth, Imm & 0xFF,
false, ImmWidth,
375 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
380 bool IsHi = Imm & (1 << 9);
381 unsigned RegIdx = Imm & 0xff;
382 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
389 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
395 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
408 auto Reg = Sub ? Sub :
Op.getReg();
409 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
416 if (!DAsm->isGFX90A()) {
425 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
427 : AMDGPU::OpName::vdata;
443 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
446template <AMDGPUDisassembler::OpW
idthTy Opw>
456 assert(Imm < (1 << 9) &&
"9-bit encoding");
463#define DECODE_SDWA(DecName) \
464DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
474 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
477#include "AMDGPUGenDisassemblerTables.inc"
486 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
487 Bytes = Bytes.
slice(
sizeof(
T));
494 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
495 Bytes = Bytes.
slice(8);
497 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
498 Bytes = Bytes.
slice(4);
505 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
506 Bytes = Bytes.
slice(8);
508 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
509 Bytes = Bytes.
slice(8);
517 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
518 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
522 Size = std::min((
size_t)4, Bytes_.
size());
548 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
550 }
else if (Bytes.
size() >= 16 &&
557 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
560 if (Bytes.
size() >= 8) {
561 const uint64_t QW = eatBytes<uint64_t>(Bytes);
619 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
623 if (Bytes.
size() >= 4) {
624 const uint32_t DW = eatBytes<uint32_t>(Bytes);
687 AMDGPU::OpName::src2_modifiers);
690 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
691 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
694 AMDGPU::OpName::src2_modifiers);
702 if (MCII->get(
MI.getOpcode()).TSFlags &
705 AMDGPU::OpName::cpol);
710 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
712 AMDGPU::OpName::cpol);
714 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
719 if ((MCII->get(
MI.getOpcode()).TSFlags &
725 if (TFEOpIdx != -1) {
726 auto *TFEIter =
MI.begin();
727 std::advance(TFEIter, TFEOpIdx);
732 if (MCII->get(
MI.getOpcode()).TSFlags &
736 if (SWZOpIdx != -1) {
737 auto *SWZIter =
MI.begin();
738 std::advance(SWZIter, SWZOpIdx);
748 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
749 if (VAddr0Idx >= 0 && NSAArgs > 0) {
750 unsigned NSAWords = (NSAArgs + 3) / 4;
751 if (Bytes.
size() < 4 * NSAWords)
753 for (
unsigned i = 0; i < NSAArgs; ++i) {
754 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
756 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
759 Bytes = Bytes.
slice(4 * NSAWords);
765 if (MCII->get(
MI.getOpcode()).TSFlags &
782 AMDGPU::OpName::vdst_in);
783 if (VDstIn_Idx != -1) {
784 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
786 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
787 !
MI.getOperand(VDstIn_Idx).isReg() ||
788 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
789 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
790 MI.erase(&
MI.getOperand(VDstIn_Idx));
793 AMDGPU::OpName::vdst_in);
800 if (ImmLitIdx != -1 && !IsSOPK)
803 Size = MaxInstBytesNum - Bytes.
size();
818 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
819 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
831 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
832 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
833 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
851 AMDGPU::OpName::sdst);
865 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
868 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
891 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
892 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
896 if (!AdjustedRegClassOpcode ||
897 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
900 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
913 unsigned OpSelHi = 0;
922 bool IsVOP3P =
false) {
924 unsigned Opc =
MI.getOpcode();
925 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
926 AMDGPU::OpName::src1_modifiers,
927 AMDGPU::OpName::src2_modifiers};
928 for (
int J = 0; J < 3; ++J) {
933 unsigned Val =
MI.getOperand(OpIdx).getImm();
951 const unsigned Opc =
MI.getOpcode();
954 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
955 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
957 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
959 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
961 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
963 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
966 if (OpIdx == -1 || OpModsIdx == -1)
974 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
975 unsigned ModVal = OpMods.
getImm();
976 if (ModVal & OpSelMask) {
986 constexpr int DST_IDX = 0;
987 auto Opcode =
MI.getOpcode();
988 const auto &
Desc = MCII->get(Opcode);
991 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1006 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1009 AMDGPU::OpName::src2_modifiers);
1013 unsigned Opc =
MI.getOpcode();
1017 if (VDstInIdx != -1)
1020 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1021 if (
MI.getNumOperands() < DescNumOps &&
1026 AMDGPU::OpName::op_sel);
1029 if (
MI.getNumOperands() < DescNumOps &&
1032 AMDGPU::OpName::src0_modifiers);
1034 if (
MI.getNumOperands() < DescNumOps &&
1037 AMDGPU::OpName::src1_modifiers);
1046 if (VDstInIdx != -1)
1049 unsigned Opc =
MI.getOpcode();
1050 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1051 if (
MI.getNumOperands() < DescNumOps &&
1055 AMDGPU::OpName::op_sel);
1063 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1066 AMDGPU::OpName::vdst);
1069 AMDGPU::OpName::vdata);
1073 : AMDGPU::OpName::rsrc;
1076 AMDGPU::OpName::dmask);
1079 AMDGPU::OpName::tfe);
1081 AMDGPU::OpName::d16);
1088 if (BaseOpcode->
BVH) {
1094 bool IsAtomic = (VDstIdx != -1);
1098 bool IsPartialNSA =
false;
1099 unsigned AddrSize =
Info->VAddrDwords;
1108 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
1115 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1116 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1117 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1119 if (!IsVSample && AddrSize > 12)
1122 if (AddrSize >
Info->VAddrDwords) {
1128 IsPartialNSA =
true;
1133 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1134 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1136 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1138 DstSize = (DstSize + 1) / 2;
1141 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1144 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1149 if (NewOpcode == -1)
1154 if (DstSize !=
Info->VDataDwords) {
1155 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1160 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1173 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1175 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1176 AddrSize !=
Info->VAddrDwords) {
1177 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1179 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1181 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1188 MI.setOpcode(NewOpcode);
1190 if (NewVdata != AMDGPU::NoRegister) {
1202 assert(AddrSize <= Info->VAddrDwords);
1203 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1204 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1212 unsigned Opc =
MI.getOpcode();
1213 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1216 if (
MI.getNumOperands() < DescNumOps &&
1220 if (
MI.getNumOperands() < DescNumOps &&
1223 AMDGPU::OpName::op_sel);
1224 if (
MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::OpName::op_sel_hi);
1228 if (
MI.getNumOperands() < DescNumOps &&
1231 AMDGPU::OpName::neg_lo);
1232 if (
MI.getNumOperands() < DescNumOps &&
1235 AMDGPU::OpName::neg_hi);
1240 unsigned Opc =
MI.getOpcode();
1241 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1243 if (
MI.getNumOperands() < DescNumOps &&
1247 if (
MI.getNumOperands() < DescNumOps &&
1250 AMDGPU::OpName::src0_modifiers);
1252 if (
MI.getNumOperands() < DescNumOps &&
1255 AMDGPU::OpName::src1_modifiers);
1259 unsigned Opc =
MI.getOpcode();
1260 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1264 if (
MI.getNumOperands() < DescNumOps &&
1268 AMDGPU::OpName::op_sel);
1273 assert(HasLiteral &&
"Should have decoded a literal");
1275 unsigned DescNumOps =
Desc.getNumOperands();
1277 AMDGPU::OpName::immDeferred);
1278 assert(DescNumOps ==
MI.getNumOperands());
1279 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1280 auto &
Op =
MI.getOperand(
I);
1281 auto OpType =
Desc.operands()[
I].OperandType;
1297 const Twine& ErrMsg)
const {
1312 unsigned Val)
const {
1313 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1314 if (Val >= RegCl.getNumRegs())
1316 ": unknown register " +
Twine(Val));
1322 unsigned Val)
const {
1326 switch (SRegClassID) {
1327 case AMDGPU::SGPR_32RegClassID:
1328 case AMDGPU::TTMP_32RegClassID:
1330 case AMDGPU::SGPR_64RegClassID:
1331 case AMDGPU::TTMP_64RegClassID:
1334 case AMDGPU::SGPR_96RegClassID:
1335 case AMDGPU::TTMP_96RegClassID:
1336 case AMDGPU::SGPR_128RegClassID:
1337 case AMDGPU::TTMP_128RegClassID:
1340 case AMDGPU::SGPR_256RegClassID:
1341 case AMDGPU::TTMP_256RegClassID:
1344 case AMDGPU::SGPR_288RegClassID:
1345 case AMDGPU::TTMP_288RegClassID:
1346 case AMDGPU::SGPR_320RegClassID:
1347 case AMDGPU::TTMP_320RegClassID:
1348 case AMDGPU::SGPR_352RegClassID:
1349 case AMDGPU::TTMP_352RegClassID:
1350 case AMDGPU::SGPR_384RegClassID:
1351 case AMDGPU::TTMP_384RegClassID:
1352 case AMDGPU::SGPR_512RegClassID:
1353 case AMDGPU::TTMP_512RegClassID:
1362 if (Val % (1 << shift)) {
1364 <<
": scalar reg isn't aligned " << Val;
1372 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1382 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1384 return errOperand(Val,
"More than one unique literal is illegal");
1396 if (Bytes.
size() < 4) {
1397 return errOperand(0,
"cannot read literal, inst bytes left " +
1401 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1409 using namespace AMDGPU::EncValues;
1411 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1413 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1414 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1421 return llvm::bit_cast<uint32_t>(0.5f);
1423 return llvm::bit_cast<uint32_t>(-0.5f);
1425 return llvm::bit_cast<uint32_t>(1.0f);
1427 return llvm::bit_cast<uint32_t>(-1.0f);
1429 return llvm::bit_cast<uint32_t>(2.0f);
1431 return llvm::bit_cast<uint32_t>(-2.0f);
1433 return llvm::bit_cast<uint32_t>(4.0f);
1435 return llvm::bit_cast<uint32_t>(-4.0f);
1446 return llvm::bit_cast<uint64_t>(0.5);
1448 return llvm::bit_cast<uint64_t>(-0.5);
1450 return llvm::bit_cast<uint64_t>(1.0);
1452 return llvm::bit_cast<uint64_t>(-1.0);
1454 return llvm::bit_cast<uint64_t>(2.0);
1456 return llvm::bit_cast<uint64_t>(-2.0);
1458 return llvm::bit_cast<uint64_t>(4.0);
1460 return llvm::bit_cast<uint64_t>(-4.0);
1462 return 0x3fc45f306dc9c882;
1546 using namespace AMDGPU;
1554 return VGPR_32RegClassID;
1556 case OPWV232:
return VReg_64RegClassID;
1557 case OPW96:
return VReg_96RegClassID;
1558 case OPW128:
return VReg_128RegClassID;
1559 case OPW192:
return VReg_192RegClassID;
1560 case OPW160:
return VReg_160RegClassID;
1561 case OPW256:
return VReg_256RegClassID;
1562 case OPW288:
return VReg_288RegClassID;
1563 case OPW320:
return VReg_320RegClassID;
1564 case OPW352:
return VReg_352RegClassID;
1565 case OPW384:
return VReg_384RegClassID;
1566 case OPW512:
return VReg_512RegClassID;
1567 case OPW1024:
return VReg_1024RegClassID;
1572 using namespace AMDGPU;
1580 return AGPR_32RegClassID;
1582 case OPWV232:
return AReg_64RegClassID;
1583 case OPW96:
return AReg_96RegClassID;
1584 case OPW128:
return AReg_128RegClassID;
1585 case OPW160:
return AReg_160RegClassID;
1586 case OPW256:
return AReg_256RegClassID;
1587 case OPW288:
return AReg_288RegClassID;
1588 case OPW320:
return AReg_320RegClassID;
1589 case OPW352:
return AReg_352RegClassID;
1590 case OPW384:
return AReg_384RegClassID;
1591 case OPW512:
return AReg_512RegClassID;
1592 case OPW1024:
return AReg_1024RegClassID;
1598 using namespace AMDGPU;
1606 return SGPR_32RegClassID;
1608 case OPWV232:
return SGPR_64RegClassID;
1609 case OPW96:
return SGPR_96RegClassID;
1610 case OPW128:
return SGPR_128RegClassID;
1611 case OPW160:
return SGPR_160RegClassID;
1612 case OPW256:
return SGPR_256RegClassID;
1613 case OPW288:
return SGPR_288RegClassID;
1614 case OPW320:
return SGPR_320RegClassID;
1615 case OPW352:
return SGPR_352RegClassID;
1616 case OPW384:
return SGPR_384RegClassID;
1617 case OPW512:
return SGPR_512RegClassID;
1622 using namespace AMDGPU;
1630 return TTMP_32RegClassID;
1632 case OPWV232:
return TTMP_64RegClassID;
1633 case OPW128:
return TTMP_128RegClassID;
1634 case OPW256:
return TTMP_256RegClassID;
1635 case OPW288:
return TTMP_288RegClassID;
1636 case OPW320:
return TTMP_320RegClassID;
1637 case OPW352:
return TTMP_352RegClassID;
1638 case OPW384:
return TTMP_384RegClassID;
1639 case OPW512:
return TTMP_512RegClassID;
1644 using namespace AMDGPU::EncValues;
1646 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1647 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1649 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1653 bool MandatoryLiteral,
1656 using namespace AMDGPU::EncValues;
1660 bool IsAGPR = Val & 512;
1663 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1673 bool MandatoryLiteral,
unsigned ImmWidth,
1677 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1678 using namespace AMDGPU::EncValues;
1682 static_assert(SGPR_MIN == 0);
1691 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1694 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1697 if (Val == LITERAL_CONST) {
1698 if (MandatoryLiteral)
1725 unsigned Val)
const {
1731 Val |= ~XDstReg & 1;
1737 using namespace AMDGPU;
1773 using namespace AMDGPU;
1804 using namespace AMDGPU;
1825 using namespace AMDGPU::SDWA;
1826 using namespace AMDGPU::EncValues;
1832 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1833 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1835 Val - SDWA9EncValues::SRC_VGPR_MIN);
1837 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1838 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1839 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1841 Val - SDWA9EncValues::SRC_SGPR_MIN);
1843 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1844 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1846 Val - SDWA9EncValues::SRC_TTMP_MIN);
1849 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1851 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1854 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1873 using namespace AMDGPU::SDWA;
1877 "SDWAVopcDst should be present only on GFX9+");
1879 bool IsWave32 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize32);
1881 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1882 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1920 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
1923 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
1933 if (
I == Versions.end())
1983 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2005 if (PopCount == 1) {
2006 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2008 S <<
"bits in range ("
2009 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2010 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2016#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2017#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2019 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2021#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2023 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2024 << GET_FIELD(MASK) << '\n'; \
2027#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2029 if (FourByteBuffer & (MASK)) { \
2030 return createStringError(std::errc::invalid_argument, \
2031 "kernel descriptor " DESC \
2032 " reserved %s set" MSG, \
2033 getBitRangeFromMask((MASK), 0).c_str()); \
2037#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2038#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2039 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2040#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2041 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2042#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2043 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2048 using namespace amdhsa;
2056 uint32_t GranulatedWorkitemVGPRCount =
2057 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2060 (GranulatedWorkitemVGPRCount + 1) *
2063 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2084 uint32_t GranulatedWavefrontSGPRCount =
2085 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2089 "must be zero on gfx10+");
2091 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2094 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2096 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2097 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2098 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2105 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2107 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2115 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2121 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2127 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2131 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2137 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
2141 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2142 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2143 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2148 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2156 using namespace amdhsa;
2160 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2162 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2163 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2165 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2167 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2169 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2171 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2173 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2180 ".amdhsa_exception_fp_ieee_invalid_op",
2181 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2183 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2185 ".amdhsa_exception_fp_ieee_div_zero",
2186 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2188 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2190 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2192 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2194 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2204 using namespace amdhsa;
2207 KdStream << Indent <<
".amdhsa_accum_offset "
2208 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2211 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2214 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2216 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2220 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2222 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2225 "SHARED_VGPR_COUNT",
2226 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2230 "COMPUTE_PGM_RSRC3",
2231 "must be zero on gfx12+");
2237 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2239 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2241 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2244 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2247 "COMPUTE_PGM_RSRC3",
2248 "must be zero on gfx10");
2253 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2258 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2261 "COMPUTE_PGM_RSRC3",
2262 "must be zero on gfx10 or gfx11");
2267 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2272 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2275 "COMPUTE_PGM_RSRC3",
2276 "must be zero on gfx10");
2278 }
else if (FourByteBuffer) {
2280 std::errc::invalid_argument,
2281 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2285#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2286#undef PRINT_DIRECTIVE
2288#undef CHECK_RESERVED_BITS_IMPL
2289#undef CHECK_RESERVED_BITS
2290#undef CHECK_RESERVED_BITS_MSG
2291#undef CHECK_RESERVED_BITS_DESC
2292#undef CHECK_RESERVED_BITS_DESC_MSG
2297 const char *Msg =
"") {
2299 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2306 unsigned WidthInBytes) {
2310 std::errc::invalid_argument,
2311 "kernel descriptor reserved bits in range (%u:%u) set",
2312 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2318#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2320 KdStream << Indent << DIRECTIVE " " \
2321 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2333 switch (Cursor.
tell()) {
2335 FourByteBuffer = DE.
getU32(Cursor);
2336 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2341 FourByteBuffer = DE.
getU32(Cursor);
2342 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2343 << FourByteBuffer <<
'\n';
2347 FourByteBuffer = DE.
getU32(Cursor);
2348 KdStream << Indent <<
".amdhsa_kernarg_size "
2349 << FourByteBuffer <<
'\n';
2354 ReservedBytes = DE.
getBytes(Cursor, 4);
2355 for (
int I = 0;
I < 4; ++
I) {
2356 if (ReservedBytes[
I] != 0)
2370 ReservedBytes = DE.
getBytes(Cursor, 20);
2371 for (
int I = 0;
I < 20; ++
I) {
2372 if (ReservedBytes[
I] != 0)
2378 FourByteBuffer = DE.
getU32(Cursor);
2382 FourByteBuffer = DE.
getU32(Cursor);
2386 FourByteBuffer = DE.
getU32(Cursor);
2390 using namespace amdhsa;
2391 TwoByteBuffer = DE.
getU16(Cursor);
2395 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2397 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2399 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2401 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2403 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2406 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2408 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2410 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2416 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2418 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2423 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2428 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2430 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2438 using namespace amdhsa;
2439 TwoByteBuffer = DE.
getU16(Cursor);
2440 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2442 KERNARG_PRELOAD_SPEC_LENGTH);
2445 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2447 KERNARG_PRELOAD_SPEC_OFFSET);
2453 ReservedBytes = DE.
getBytes(Cursor, 4);
2454 for (
int I = 0;
I < 4; ++
I) {
2455 if (ReservedBytes[
I] != 0)
2464#undef PRINT_DIRECTIVE
2471 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2473 "kernel descriptor must be 64-byte aligned");
2484 EnableWavefrontSize32 =
2486 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2491 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2494 while (
C &&
C.tell() < Bytes.
size()) {
2502 KdStream <<
".end_amdhsa_kernel\n";
2521 "code object v2 is not supported");
2534const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2540 if (!
Sym->isVariable()) {
2544 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2545 if (!Valid || Res != Val)
2573 if (Result != Symbols->end()) {
2580 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2599 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertVOPC64DPPInst(MCInst &MI) const
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.