40#define DEBUG_TYPE "amdgpu-disassembler"
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
50 if (!STI.
hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51 !STI.
hasFeature(AMDGPU::FeatureWavefrontSize32)) {
66 MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
74 createConstantSymbolExpr(Symbol, Code);
76 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
77 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
78 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
98 std::advance(
I, OpIdx);
111 APInt SignedOffset(18, Imm * 4,
true);
112 int64_t
Offset = (SignedOffset.
sext(64) + 4 +
Addr).getSExtValue();
114 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset,
Addr,
true, 2, 2, 0))
123 if (DAsm->isGFX12Plus()) {
124 Offset = SignExtend64<24>(Imm);
125 }
else if (DAsm->isVI()) {
128 Offset = SignExtend64<21>(Imm);
136 return addOperand(Inst, DAsm->decodeBoolReg(Val));
143 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
149 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
152#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
153 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
155 const MCDisassembler *Decoder) { \
156 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
157 return addOperand(Inst, DAsm->DecoderName(Imm)); \
162#define DECODE_OPERAND_REG_8(RegClass) \
163 static DecodeStatus Decode##RegClass##RegisterClass( \
164 MCInst &Inst, unsigned Imm, uint64_t , \
165 const MCDisassembler *Decoder) { \
166 assert(Imm < (1 << 8) && "8-bit encoding"); \
167 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
169 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
172#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
174 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
175 const MCDisassembler *Decoder) { \
176 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
177 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
178 return addOperand(Inst, \
179 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
180 MandatoryLiteral, ImmWidth)); \
185 unsigned Imm,
unsigned EncImm,
186 bool MandatoryLiteral,
unsigned ImmWidth,
189 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
191 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
197#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
198 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
204template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
212template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
216 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, 0,
223template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, 0,
232template <AMDGPUDisassembler::OpW
idthTy OpW
idth>
236 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm,
false, 0,
250 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
false, ImmWidth,
261 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512,
false, ImmWidth,
270 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm,
true, ImmWidth,
311 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
312 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
314 bool IsHi = Imm & (1 << 9);
315 unsigned RegIdx = Imm & 0xff;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
323 assert(isUInt<8>(Imm) &&
"8-bit encoding expected");
325 bool IsHi = Imm & (1 << 7);
326 unsigned RegIdx = Imm & 0x7f;
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 assert(isUInt<9>(Imm) &&
"9-bit encoding expected");
337 bool IsVGPR = Imm & (1 << 8);
339 bool IsHi = Imm & (1 << 7);
340 unsigned RegIdx = Imm & 0x7f;
341 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
344 Imm & 0xFF,
false, 16));
350 assert(isUInt<10>(Imm) &&
"10-bit encoding expected");
353 bool IsVGPR = Imm & (1 << 8);
355 bool IsHi = Imm & (1 << 9);
356 unsigned RegIdx = Imm & 0xff;
357 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
360 Imm & 0xFF,
false, 16));
367 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
373 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
385 unsigned Sub =
MRI->getSubReg(
Op.getReg(), AMDGPU::sub0);
386 auto Reg = Sub ? Sub :
Op.getReg();
387 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
394 if (!DAsm->isGFX90A()) {
403 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
405 : AMDGPU::OpName::vdata;
421 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
424template <AMDGPUDisassembler::OpW
idthTy Opw>
434 assert(Imm < (1 << 9) &&
"9-bit encoding");
441#define DECODE_SDWA(DecName) \
442DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
452 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
455#include "AMDGPUGenDisassemblerTables.inc"
464 support::endian::read<T, llvm::endianness::little>(Bytes.
data());
465 Bytes = Bytes.
slice(
sizeof(
T));
472 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.
data());
473 Bytes = Bytes.
slice(8);
475 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.
data());
476 Bytes = Bytes.
slice(4);
484 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
485 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
489 Size = std::min((
size_t)4, Bytes_.
size());
515 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
518 if (Bytes.
size() >= 8) {
519 const uint64_t QW = eatBytes<uint64_t>(Bytes);
573 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
577 if (Bytes.
size() >= 4) {
578 const uint32_t DW = eatBytes<uint32_t>(Bytes);
634 AMDGPU::OpName::src2_modifiers);
637 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
638 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
641 AMDGPU::OpName::src2_modifiers);
649 if (MCII->get(
MI.getOpcode()).TSFlags &
652 AMDGPU::OpName::cpol);
657 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
659 AMDGPU::OpName::cpol);
661 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
666 if ((MCII->get(
MI.getOpcode()).TSFlags &
672 if (TFEOpIdx != -1) {
673 auto TFEIter =
MI.begin();
674 std::advance(TFEIter, TFEOpIdx);
679 if (MCII->get(
MI.getOpcode()).TSFlags &
683 if (SWZOpIdx != -1) {
684 auto SWZIter =
MI.begin();
685 std::advance(SWZIter, SWZOpIdx);
695 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
696 if (VAddr0Idx >= 0 && NSAArgs > 0) {
697 unsigned NSAWords = (NSAArgs + 3) / 4;
698 if (Bytes.
size() < 4 * NSAWords)
700 for (
unsigned i = 0; i < NSAArgs; ++i) {
701 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
703 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
706 Bytes = Bytes.
slice(4 * NSAWords);
712 if (MCII->get(
MI.getOpcode()).TSFlags &
726 AMDGPU::OpName::vdst_in);
727 if (VDstIn_Idx != -1) {
728 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
730 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
731 !
MI.getOperand(VDstIn_Idx).isReg() ||
732 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
733 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
734 MI.erase(&
MI.getOperand(VDstIn_Idx));
737 AMDGPU::OpName::vdst_in);
744 if (ImmLitIdx != -1 && !IsSOPK)
747 Size = MaxInstBytesNum - Bytes.
size();
761 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
762 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
763 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
764 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
765 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
766 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
767 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
768 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
786 AMDGPU::OpName::sdst);
796 unsigned OpSelHi = 0;
805 bool IsVOP3P =
false) {
807 unsigned Opc =
MI.getOpcode();
808 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
809 AMDGPU::OpName::src1_modifiers,
810 AMDGPU::OpName::src2_modifiers};
811 for (
int J = 0; J < 3; ++J) {
816 unsigned Val =
MI.getOperand(OpIdx).getImm();
834 const unsigned Opc =
MI.getOpcode();
837 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
838 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
840 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
842 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
844 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
846 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
849 if (OpIdx == -1 || OpModsIdx == -1)
857 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
858 unsigned ModVal = OpMods.
getImm();
859 if (ModVal & OpSelMask) {
869 constexpr int DST_IDX = 0;
870 auto Opcode =
MI.getOpcode();
871 const auto &
Desc = MCII->get(Opcode);
874 if (OldIdx != -1 &&
Desc.getOperandConstraint(
889 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
892 AMDGPU::OpName::src2_modifiers);
896 unsigned Opc =
MI.getOpcode();
903 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
904 if (
MI.getNumOperands() < DescNumOps &&
909 AMDGPU::OpName::op_sel);
912 if (
MI.getNumOperands() < DescNumOps &&
915 AMDGPU::OpName::src0_modifiers);
917 if (
MI.getNumOperands() < DescNumOps &&
920 AMDGPU::OpName::src1_modifiers);
932 unsigned Opc =
MI.getOpcode();
933 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
934 if (
MI.getNumOperands() < DescNumOps &&
938 AMDGPU::OpName::op_sel);
946 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
949 AMDGPU::OpName::vdst);
952 AMDGPU::OpName::vdata);
956 : AMDGPU::OpName::rsrc;
959 AMDGPU::OpName::dmask);
962 AMDGPU::OpName::tfe);
964 AMDGPU::OpName::d16);
971 if (BaseOpcode->
BVH) {
977 bool IsAtomic = (VDstIdx != -1);
981 bool IsPartialNSA =
false;
982 unsigned AddrSize =
Info->VAddrDwords;
991 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).getImm());
998 IsNSA =
Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
999 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1000 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1002 if (!IsVSample && AddrSize > 12)
1005 if (AddrSize >
Info->VAddrDwords) {
1011 IsPartialNSA =
true;
1016 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1017 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1019 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1021 DstSize = (DstSize + 1) / 2;
1024 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1027 if (DstSize ==
Info->VDataDwords && AddrSize ==
Info->VAddrDwords)
1032 if (NewOpcode == -1)
1036 unsigned NewVdata = AMDGPU::NoRegister;
1037 if (DstSize !=
Info->VDataDwords) {
1038 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1041 unsigned Vdata0 =
MI.getOperand(VDataIdx).getReg();
1042 unsigned VdataSub0 = MRI.
getSubReg(Vdata0, AMDGPU::sub0);
1043 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1047 if (NewVdata == AMDGPU::NoRegister) {
1056 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1057 unsigned NewVAddrSA = AMDGPU::NoRegister;
1058 if (
STI.
hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1059 AddrSize !=
Info->VAddrDwords) {
1060 unsigned VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1061 unsigned VAddrSubSA = MRI.
getSubReg(VAddrSA, AMDGPU::sub0);
1062 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1064 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1071 MI.setOpcode(NewOpcode);
1073 if (NewVdata != AMDGPU::NoRegister) {
1085 assert(AddrSize <= Info->VAddrDwords);
1086 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1087 MI.begin() + VAddr0Idx +
Info->VAddrDwords);
1095 unsigned Opc =
MI.getOpcode();
1096 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1099 if (
MI.getNumOperands() < DescNumOps &&
1103 if (
MI.getNumOperands() < DescNumOps &&
1106 AMDGPU::OpName::op_sel);
1107 if (
MI.getNumOperands() < DescNumOps &&
1110 AMDGPU::OpName::op_sel_hi);
1111 if (
MI.getNumOperands() < DescNumOps &&
1114 AMDGPU::OpName::neg_lo);
1115 if (
MI.getNumOperands() < DescNumOps &&
1118 AMDGPU::OpName::neg_hi);
1123 unsigned Opc =
MI.getOpcode();
1124 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1126 if (
MI.getNumOperands() < DescNumOps &&
1130 if (
MI.getNumOperands() < DescNumOps &&
1133 AMDGPU::OpName::src0_modifiers);
1135 if (
MI.getNumOperands() < DescNumOps &&
1138 AMDGPU::OpName::src1_modifiers);
1142 assert(HasLiteral &&
"Should have decoded a literal");
1144 unsigned DescNumOps =
Desc.getNumOperands();
1146 AMDGPU::OpName::immDeferred);
1147 assert(DescNumOps ==
MI.getNumOperands());
1148 for (
unsigned I = 0;
I < DescNumOps; ++
I) {
1149 auto &
Op =
MI.getOperand(
I);
1150 auto OpType =
Desc.operands()[
I].OperandType;
1166 const Twine& ErrMsg)
const {
1181 unsigned Val)
const {
1182 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1183 if (Val >= RegCl.getNumRegs())
1185 ": unknown register " +
Twine(Val));
1191 unsigned Val)
const {
1195 switch (SRegClassID) {
1196 case AMDGPU::SGPR_32RegClassID:
1197 case AMDGPU::TTMP_32RegClassID:
1199 case AMDGPU::SGPR_64RegClassID:
1200 case AMDGPU::TTMP_64RegClassID:
1203 case AMDGPU::SGPR_96RegClassID:
1204 case AMDGPU::TTMP_96RegClassID:
1205 case AMDGPU::SGPR_128RegClassID:
1206 case AMDGPU::TTMP_128RegClassID:
1209 case AMDGPU::SGPR_256RegClassID:
1210 case AMDGPU::TTMP_256RegClassID:
1213 case AMDGPU::SGPR_288RegClassID:
1214 case AMDGPU::TTMP_288RegClassID:
1215 case AMDGPU::SGPR_320RegClassID:
1216 case AMDGPU::TTMP_320RegClassID:
1217 case AMDGPU::SGPR_352RegClassID:
1218 case AMDGPU::TTMP_352RegClassID:
1219 case AMDGPU::SGPR_384RegClassID:
1220 case AMDGPU::TTMP_384RegClassID:
1221 case AMDGPU::SGPR_512RegClassID:
1222 case AMDGPU::TTMP_512RegClassID:
1231 if (Val % (1 << shift)) {
1233 <<
": scalar reg isn't aligned " << Val;
1241 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1251 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1253 return errOperand(Val,
"More than one unique literal is illegal");
1265 if (Bytes.
size() < 4) {
1266 return errOperand(0,
"cannot read literal, inst bytes left " +
1270 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1278 using namespace AMDGPU::EncValues;
1280 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1282 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1283 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1290 return llvm::bit_cast<uint32_t>(0.5f);
1292 return llvm::bit_cast<uint32_t>(-0.5f);
1294 return llvm::bit_cast<uint32_t>(1.0f);
1296 return llvm::bit_cast<uint32_t>(-1.0f);
1298 return llvm::bit_cast<uint32_t>(2.0f);
1300 return llvm::bit_cast<uint32_t>(-2.0f);
1302 return llvm::bit_cast<uint32_t>(4.0f);
1304 return llvm::bit_cast<uint32_t>(-4.0f);
1315 return llvm::bit_cast<uint64_t>(0.5);
1317 return llvm::bit_cast<uint64_t>(-0.5);
1319 return llvm::bit_cast<uint64_t>(1.0);
1321 return llvm::bit_cast<uint64_t>(-1.0);
1323 return llvm::bit_cast<uint64_t>(2.0);
1325 return llvm::bit_cast<uint64_t>(-2.0);
1327 return llvm::bit_cast<uint64_t>(4.0);
1329 return llvm::bit_cast<uint64_t>(-4.0);
1331 return 0x3fc45f306dc9c882;
1415 using namespace AMDGPU;
1423 return VGPR_32RegClassID;
1425 case OPWV232:
return VReg_64RegClassID;
1426 case OPW96:
return VReg_96RegClassID;
1427 case OPW128:
return VReg_128RegClassID;
1428 case OPW160:
return VReg_160RegClassID;
1429 case OPW256:
return VReg_256RegClassID;
1430 case OPW288:
return VReg_288RegClassID;
1431 case OPW320:
return VReg_320RegClassID;
1432 case OPW352:
return VReg_352RegClassID;
1433 case OPW384:
return VReg_384RegClassID;
1434 case OPW512:
return VReg_512RegClassID;
1435 case OPW1024:
return VReg_1024RegClassID;
1440 using namespace AMDGPU;
1448 return AGPR_32RegClassID;
1450 case OPWV232:
return AReg_64RegClassID;
1451 case OPW96:
return AReg_96RegClassID;
1452 case OPW128:
return AReg_128RegClassID;
1453 case OPW160:
return AReg_160RegClassID;
1454 case OPW256:
return AReg_256RegClassID;
1455 case OPW288:
return AReg_288RegClassID;
1456 case OPW320:
return AReg_320RegClassID;
1457 case OPW352:
return AReg_352RegClassID;
1458 case OPW384:
return AReg_384RegClassID;
1459 case OPW512:
return AReg_512RegClassID;
1460 case OPW1024:
return AReg_1024RegClassID;
1466 using namespace AMDGPU;
1474 return SGPR_32RegClassID;
1476 case OPWV232:
return SGPR_64RegClassID;
1477 case OPW96:
return SGPR_96RegClassID;
1478 case OPW128:
return SGPR_128RegClassID;
1479 case OPW160:
return SGPR_160RegClassID;
1480 case OPW256:
return SGPR_256RegClassID;
1481 case OPW288:
return SGPR_288RegClassID;
1482 case OPW320:
return SGPR_320RegClassID;
1483 case OPW352:
return SGPR_352RegClassID;
1484 case OPW384:
return SGPR_384RegClassID;
1485 case OPW512:
return SGPR_512RegClassID;
1490 using namespace AMDGPU;
1498 return TTMP_32RegClassID;
1500 case OPWV232:
return TTMP_64RegClassID;
1501 case OPW128:
return TTMP_128RegClassID;
1502 case OPW256:
return TTMP_256RegClassID;
1503 case OPW288:
return TTMP_288RegClassID;
1504 case OPW320:
return TTMP_320RegClassID;
1505 case OPW352:
return TTMP_352RegClassID;
1506 case OPW384:
return TTMP_384RegClassID;
1507 case OPW512:
return TTMP_512RegClassID;
1512 using namespace AMDGPU::EncValues;
1514 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1515 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1517 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1521 bool MandatoryLiteral,
1524 using namespace AMDGPU::EncValues;
1528 bool IsAGPR = Val & 512;
1531 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1541 bool MandatoryLiteral,
unsigned ImmWidth,
1545 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1546 using namespace AMDGPU::EncValues;
1550 static_assert(SGPR_MIN == 0);
1559 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1562 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1565 if (Val == LITERAL_CONST) {
1566 if (MandatoryLiteral)
1588 unsigned Val)
const {
1594 Val |= ~XDstReg & 1;
1600 using namespace AMDGPU;
1636 using namespace AMDGPU;
1670 using namespace AMDGPU::SDWA;
1671 using namespace AMDGPU::EncValues;
1677 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
1678 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1680 Val - SDWA9EncValues::SRC_VGPR_MIN);
1682 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1683 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1684 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1686 Val - SDWA9EncValues::SRC_SGPR_MIN);
1688 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1689 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1691 Val - SDWA9EncValues::SRC_TTMP_MIN);
1694 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1696 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1699 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1718 using namespace AMDGPU::SDWA;
1722 "SDWAVopcDst should be present only on GFX9+");
1724 bool IsWave64 =
STI.
hasFeature(AMDGPU::FeatureWavefrontSize64);
1726 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1727 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1765 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
1768 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
1778 if (
I == Versions.end())
1828 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1850 if (PopCount == 1) {
1851 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1853 S <<
"bits in range ("
1854 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
1855 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
1861#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1862#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1864 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1866#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1868 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1869 << GET_FIELD(MASK) << '\n'; \
1872#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1874 if (FourByteBuffer & (MASK)) { \
1875 return createStringError(std::errc::invalid_argument, \
1876 "kernel descriptor " DESC \
1877 " reserved %s set" MSG, \
1878 getBitRangeFromMask((MASK), 0).c_str()); \
1882#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1883#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1884 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1885#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1886 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1887#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1888 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1893 using namespace amdhsa;
1901 uint32_t GranulatedWorkitemVGPRCount =
1902 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1905 (GranulatedWorkitemVGPRCount + 1) *
1908 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
1929 uint32_t GranulatedWavefrontSGPRCount =
1930 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1934 "must be zero on gfx10+");
1936 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1939 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
1941 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
1942 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
1943 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
1948 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1950 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1952 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1954 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1960 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1966 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1972 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1976 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
1982 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx10");
1986 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1987 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1988 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1993 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2001 using namespace amdhsa;
2005 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2007 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2008 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2010 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2012 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2014 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2016 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2018 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2025 ".amdhsa_exception_fp_ieee_invalid_op",
2026 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2028 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2030 ".amdhsa_exception_fp_ieee_div_zero",
2031 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2033 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2035 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2037 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2039 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2049 using namespace amdhsa;
2052 KdStream << Indent <<
".amdhsa_accum_offset "
2053 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2056 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2059 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2061 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2065 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2067 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2070 "SHARED_VGPR_COUNT",
2071 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2075 "COMPUTE_PGM_RSRC3",
2076 "must be zero on gfx12+");
2082 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2084 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2086 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2089 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2092 "COMPUTE_PGM_RSRC3",
2093 "must be zero on gfx10");
2098 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2103 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2106 "COMPUTE_PGM_RSRC3",
2107 "must be zero on gfx10 or gfx11");
2112 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2117 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2120 "COMPUTE_PGM_RSRC3",
2121 "must be zero on gfx10");
2123 }
else if (FourByteBuffer) {
2125 std::errc::invalid_argument,
2126 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2130#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2131#undef PRINT_DIRECTIVE
2133#undef CHECK_RESERVED_BITS_IMPL
2134#undef CHECK_RESERVED_BITS
2135#undef CHECK_RESERVED_BITS_MSG
2136#undef CHECK_RESERVED_BITS_DESC
2137#undef CHECK_RESERVED_BITS_DESC_MSG
2142 const char *Msg =
"") {
2144 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2151 unsigned WidthInBytes) {
2155 std::errc::invalid_argument,
2156 "kernel descriptor reserved bits in range (%u:%u) set",
2157 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2163#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2165 KdStream << Indent << DIRECTIVE " " \
2166 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2178 switch (Cursor.
tell()) {
2180 FourByteBuffer = DE.
getU32(Cursor);
2181 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2186 FourByteBuffer = DE.
getU32(Cursor);
2187 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2188 << FourByteBuffer <<
'\n';
2192 FourByteBuffer = DE.
getU32(Cursor);
2193 KdStream << Indent <<
".amdhsa_kernarg_size "
2194 << FourByteBuffer <<
'\n';
2199 ReservedBytes = DE.
getBytes(Cursor, 4);
2200 for (
int I = 0;
I < 4; ++
I) {
2201 if (ReservedBytes[
I] != 0)
2215 ReservedBytes = DE.
getBytes(Cursor, 20);
2216 for (
int I = 0;
I < 20; ++
I) {
2217 if (ReservedBytes[
I] != 0)
2223 FourByteBuffer = DE.
getU32(Cursor);
2227 FourByteBuffer = DE.
getU32(Cursor);
2231 FourByteBuffer = DE.
getU32(Cursor);
2235 using namespace amdhsa;
2236 TwoByteBuffer = DE.
getU16(Cursor);
2240 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2242 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2244 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2246 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2248 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2253 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2255 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2261 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2263 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2268 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2273 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2275 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2283 using namespace amdhsa;
2284 TwoByteBuffer = DE.
getU16(Cursor);
2285 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2287 KERNARG_PRELOAD_SPEC_LENGTH);
2290 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2292 KERNARG_PRELOAD_SPEC_OFFSET);
2298 ReservedBytes = DE.
getBytes(Cursor, 4);
2299 for (
int I = 0;
I < 4; ++
I) {
2300 if (ReservedBytes[
I] != 0)
2309#undef PRINT_DIRECTIVE
2316 if (Bytes.
size() != 64 || KdAddress % 64 != 0)
2318 "kernel descriptor must be 64-byte aligned");
2329 EnableWavefrontSize32 =
2331 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2336 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2339 while (
C &&
C.tell() < Bytes.
size()) {
2347 KdStream <<
".end_amdhsa_kernel\n";
2366 "code object v2 is not supported");
2379const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2385 if (!
Sym->isVariable()) {
2389 bool Valid =
Sym->getVariableValue()->evaluateAsAbsolute(Res);
2390 if (!Valid || Res != Val)
2418 if (Result != Symbols->end()) {
2425 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2444 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static const MCSubtargetInfo & addDefaultWaveSize(const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
bool hasArchitectedFlatScratch() const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
MCSubtargetInfo & getSubtargetCopy(const MCSubtargetInfo &STI)
const MCRegisterInfo * getRegisterInfo() const
void reportWarning(SMLoc L, const Twine &Msg)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createImm(int64_t Val)
unsigned getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.