44#define DEBUG_TYPE "amdgpu-disassembler"
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
60 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
61 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
63 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
67 createConstantSymbolExpr(Symbol, Code);
69 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
70 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
71 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
87 AMDGPU::OpName Name) {
88 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
105 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
114 if (DAsm->isGFX12Plus()) {
116 }
else if (DAsm->isVI()) {
127 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
134 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
140 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
144 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
146 const MCDisassembler *Decoder) { \
147 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
148 return addOperand(Inst, DAsm->DecoderName(Imm)); \
153#define DECODE_OPERAND_REG_8(RegClass) \
154 static DecodeStatus Decode##RegClass##RegisterClass( \
155 MCInst &Inst, unsigned Imm, uint64_t , \
156 const MCDisassembler *Decoder) { \
157 assert(Imm < (1 << 8) && "8-bit encoding"); \
158 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
164 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
165 const MCDisassembler *Decoder) { \
166 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
167 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
168 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
172 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
174 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
176 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
181#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
184#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
185 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
191template <
unsigned OpW
idth>
199template <
unsigned OpW
idth>
203 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
209template <
unsigned OpW
idth>
212 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
217template <
unsigned OpW
idth>
221 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
229template <
unsigned OpW
idth>
233 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
238template <
unsigned OpW
idth>
242 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
290 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
292 bool IsHi = Imm & (1 << 9);
293 unsigned RegIdx = Imm & 0xff;
295 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
303 bool IsHi = Imm & (1 << 7);
304 unsigned RegIdx = Imm & 0x7f;
306 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309template <
unsigned OpW
idth>
317 bool IsHi = Imm & (1 << 7);
318 unsigned RegIdx = Imm & 0x7f;
319 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
321 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324template <
unsigned OpW
idth>
332 bool IsHi = Imm & (1 << 9);
333 unsigned RegIdx = Imm & 0xff;
334 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
336 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
349 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
356 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
363 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
367 uint64_t Addr,
const void *Decoder) {
369 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
375 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378template <
unsigned Opw>
388 assert(Imm < (1 << 9) &&
"9-bit encoding");
390 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393#define DECODE_SDWA(DecName) \
394DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
404 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407#include "AMDGPUGenDisassemblerTables.inc"
411template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
412template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
413template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
414template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
421template <
typename InsnType>
429 const auto SavedBytes = Bytes;
436 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
442 Comments << LocalComments;
449template <
typename InsnType>
454 for (
const uint8_t *
T : {Table1, Table2}) {
465 Bytes = Bytes.
slice(
sizeof(
T));
473 Bytes = Bytes.
slice(8);
475 Bytes = Bytes.
slice(4);
476 return (
Hi << 64) |
Lo;
483 Bytes = Bytes.
slice(8);
485 Bytes = Bytes.
slice(8);
486 return (
Hi << 64) |
Lo;
489void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
491 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
493 if (OpNo >=
MI.getNumOperands())
503 MCOperand &
Op =
MI.getOperand(OpNo);
506 int64_t
Imm =
Op.getImm();
521 switch (OpDesc.OperandType) {
555 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
556 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
560 Size = std::min((
size_t)4, Bytes_.
size());
572 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
597 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
599 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
607 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
609 }
else if (Bytes.size() >= 16 &&
610 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
616 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
619 if (Bytes.size() >= 8) {
622 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
626 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
630 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
637 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
641 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
645 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
683 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
687 if (Bytes.size() >= 4) {
700 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
704 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
708 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
736 decodeImmOperands(
MI, *MCII);
748 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
760 AMDGPU::OpName::src2_modifiers);
763 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
764 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
767 AMDGPU::OpName::src2_modifiers);
775 if (MCII->get(
MI.getOpcode()).TSFlags &
777 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
778 AMDGPU::OpName::cpol);
783 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
785 AMDGPU::OpName::cpol);
787 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
792 if ((MCII->get(
MI.getOpcode()).TSFlags &
794 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
797 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
798 if (TFEOpIdx != -1) {
799 auto *TFEIter =
MI.begin();
800 std::advance(TFEIter, TFEOpIdx);
808 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
809 if (OffsetIdx != -1) {
810 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
812 if (SignedOffset < 0)
817 if (MCII->get(
MI.getOpcode()).TSFlags &
820 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
821 if (SWZOpIdx != -1) {
822 auto *SWZIter =
MI.begin();
823 std::advance(SWZIter, SWZOpIdx);
830 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
832 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
833 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
834 if (VAddr0Idx >= 0 && NSAArgs > 0) {
835 unsigned NSAWords = (NSAArgs + 3) / 4;
836 if (Bytes.size() < 4 * NSAWords)
838 for (
unsigned i = 0; i < NSAArgs; ++i) {
839 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
841 MCII->get(
MI.getOpcode()).operands()[VAddrIdx].RegClass;
844 Bytes = Bytes.slice(4 * NSAWords);
850 if (MCII->get(
MI.getOpcode()).TSFlags &
869 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
870 AMDGPU::OpName::vdst_in);
871 if (VDstIn_Idx != -1) {
872 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
874 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
875 !
MI.getOperand(VDstIn_Idx).isReg() ||
876 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
877 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
878 MI.erase(&
MI.getOperand(VDstIn_Idx));
881 AMDGPU::OpName::vdst_in);
893 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
894 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
895 if (Bytes_[0] != ExecEncoding)
899 Size = MaxInstBytesNum - Bytes.size();
904 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
914 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
915 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
916 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
917 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
937 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
938 STI.hasFeature(AMDGPU::FeatureGFX10)) {
942 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
943 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
947 AMDGPU::OpName::sdst);
961 return MO.
setReg(
MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3));
964 MRI.getSubReg(MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
967 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
975 BaseReg, AMDGPU::sub0, &
MRI.getRegClass(AMDGPU::VReg_384RegClassID));
993 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
998 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1000 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1001 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1005 if (!AdjustedRegClassOpcode ||
1006 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1009 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1011 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1013 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1022 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1027 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1029 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1030 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1034 if (!AdjustedRegClassOpcode ||
1035 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1038 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1040 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1042 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1060 bool IsVOP3P =
false) {
1062 unsigned Opc =
MI.getOpcode();
1063 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1064 AMDGPU::OpName::src1_modifiers,
1065 AMDGPU::OpName::src2_modifiers};
1066 for (
int J = 0; J < 3; ++J) {
1067 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1071 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1078 }
else if (J == 0) {
1089 const unsigned Opc =
MI.getOpcode();
1091 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1092 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1093 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1095 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1097 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1099 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1101 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1103 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1104 if (
OpIdx == -1 || OpModsIdx == -1)
1111 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1112 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1113 unsigned ModVal = OpMods.
getImm();
1114 if (ModVal & OpSelMask) {
1124 constexpr int DST_IDX = 0;
1125 auto Opcode =
MI.getOpcode();
1126 const auto &
Desc = MCII->get(Opcode);
1127 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1129 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1133 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1144 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1147 AMDGPU::OpName::src2_modifiers);
1151 unsigned Opc =
MI.getOpcode();
1154 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1155 if (VDstInIdx != -1)
1158 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1159 if (
MI.getNumOperands() < DescNumOps &&
1164 AMDGPU::OpName::op_sel);
1167 if (
MI.getNumOperands() < DescNumOps &&
1170 AMDGPU::OpName::src0_modifiers);
1172 if (
MI.getNumOperands() < DescNumOps &&
1175 AMDGPU::OpName::src1_modifiers);
1183 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1184 if (VDstInIdx != -1)
1187 unsigned Opc =
MI.getOpcode();
1188 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1189 if (
MI.getNumOperands() < DescNumOps &&
1193 AMDGPU::OpName::op_sel);
1207 if (
MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1208 BaseReg = AMDGPU::VGPR0;
1209 else if (
MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1210 BaseReg = AMDGPU::AGPR0;
1212 assert(BaseReg &&
"Only vector registers expected");
1214 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg : AMDGPU::NoRegister;
1221 auto TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
1223 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1224 AMDGPU::OpName::vdst);
1226 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1227 AMDGPU::OpName::vdata);
1229 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1231 ? AMDGPU::OpName::srsrc
1232 : AMDGPU::OpName::rsrc;
1233 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1234 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1235 AMDGPU::OpName::dmask);
1237 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1238 AMDGPU::OpName::tfe);
1239 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1240 AMDGPU::OpName::d16);
1247 if (BaseOpcode->
BVH) {
1253 bool IsAtomic = (VDstIdx != -1);
1257 bool IsPartialNSA =
false;
1258 unsigned AddrSize = Info->VAddrDwords;
1262 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1264 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1267 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1274 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1275 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1276 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1278 if (!IsVSample && AddrSize > 12)
1281 if (AddrSize > Info->VAddrDwords) {
1282 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1287 IsPartialNSA =
true;
1292 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1293 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1295 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1297 DstSize = (DstSize + 1) / 2;
1300 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1303 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1308 if (NewOpcode == -1)
1313 if (DstSize != Info->VDataDwords) {
1314 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1318 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1319 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1322 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1333 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1335 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1336 AddrSize != Info->VAddrDwords) {
1337 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1338 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1339 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1341 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1343 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1349 MI.setOpcode(NewOpcode);
1351 if (NewVdata != AMDGPU::NoRegister) {
1363 assert(AddrSize <= Info->VAddrDwords);
1364 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1365 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1373 unsigned Opc =
MI.getOpcode();
1374 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1377 if (
MI.getNumOperands() < DescNumOps &&
1381 if (
MI.getNumOperands() < DescNumOps &&
1384 AMDGPU::OpName::op_sel);
1385 if (
MI.getNumOperands() < DescNumOps &&
1388 AMDGPU::OpName::op_sel_hi);
1389 if (
MI.getNumOperands() < DescNumOps &&
1392 AMDGPU::OpName::neg_lo);
1393 if (
MI.getNumOperands() < DescNumOps &&
1396 AMDGPU::OpName::neg_hi);
1401 unsigned Opc =
MI.getOpcode();
1402 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1404 if (
MI.getNumOperands() < DescNumOps &&
1408 if (
MI.getNumOperands() < DescNumOps &&
1411 AMDGPU::OpName::src0_modifiers);
1413 if (
MI.getNumOperands() < DescNumOps &&
1416 AMDGPU::OpName::src1_modifiers);
1420 unsigned Opc =
MI.getOpcode();
1421 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1425 if (
MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::OpName::op_sel);
1434 assert(HasLiteral &&
"Should have decoded a literal");
1445 const Twine& ErrMsg)
const {
1460 unsigned Val)
const {
1461 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1462 if (Val >= RegCl.getNumRegs())
1464 ": unknown register " +
Twine(Val));
1470 unsigned Val)
const {
1474 switch (SRegClassID) {
1475 case AMDGPU::SGPR_32RegClassID:
1476 case AMDGPU::TTMP_32RegClassID:
1478 case AMDGPU::SGPR_64RegClassID:
1479 case AMDGPU::TTMP_64RegClassID:
1482 case AMDGPU::SGPR_96RegClassID:
1483 case AMDGPU::TTMP_96RegClassID:
1484 case AMDGPU::SGPR_128RegClassID:
1485 case AMDGPU::TTMP_128RegClassID:
1488 case AMDGPU::SGPR_256RegClassID:
1489 case AMDGPU::TTMP_256RegClassID:
1492 case AMDGPU::SGPR_288RegClassID:
1493 case AMDGPU::TTMP_288RegClassID:
1494 case AMDGPU::SGPR_320RegClassID:
1495 case AMDGPU::TTMP_320RegClassID:
1496 case AMDGPU::SGPR_352RegClassID:
1497 case AMDGPU::TTMP_352RegClassID:
1498 case AMDGPU::SGPR_384RegClassID:
1499 case AMDGPU::TTMP_384RegClassID:
1500 case AMDGPU::SGPR_512RegClassID:
1501 case AMDGPU::TTMP_512RegClassID:
1510 if (Val % (1 << shift)) {
1512 <<
": scalar reg isn't aligned " << Val;
1520 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1530 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1532 return errOperand(Val,
"More than one unique literal is illegal");
1542 if (Literal64 != Val)
1543 return errOperand(Val,
"More than one unique literal is illegal");
1546 Literal = Literal64 = Val;
1548 bool UseLit64 =
Lo_32(Literal64) != 0;
1556 bool ExtendFP64)
const {
1561 if (Bytes.size() < 4) {
1562 return errOperand(0,
"cannot read literal, inst bytes left " +
1563 Twine(Bytes.size()));
1571 int64_t Val = ExtendFP64 ? Literal64 : Literal;
1573 bool CanUse64BitLiterals =
1574 STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
1577 bool UseLit64 =
false;
1578 if (CanUse64BitLiterals) {
1585 UseLit64 =
Lo_32(Val) != 0;
1595 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1598 if (Bytes.size() < 8) {
1599 return errOperand(0,
"cannot read literal64, inst bytes left " +
1600 Twine(Bytes.size()));
1606 bool UseLit64 =
false;
1616 UseLit64 =
Lo_32(Literal64) != 0;
1627 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1629 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1630 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1678 return 0x3fc45f306dc9c882;
1740 return VGPR_32RegClassID;
1742 return VReg_64RegClassID;
1744 return VReg_96RegClassID;
1746 return VReg_128RegClassID;
1748 return VReg_160RegClassID;
1750 return VReg_192RegClassID;
1752 return VReg_256RegClassID;
1754 return VReg_288RegClassID;
1756 return VReg_320RegClassID;
1758 return VReg_352RegClassID;
1760 return VReg_384RegClassID;
1762 return VReg_512RegClassID;
1764 return VReg_1024RegClassID;
1775 return AGPR_32RegClassID;
1777 return AReg_64RegClassID;
1779 return AReg_96RegClassID;
1781 return AReg_128RegClassID;
1783 return AReg_160RegClassID;
1785 return AReg_256RegClassID;
1787 return AReg_288RegClassID;
1789 return AReg_320RegClassID;
1791 return AReg_352RegClassID;
1793 return AReg_384RegClassID;
1795 return AReg_512RegClassID;
1797 return AReg_1024RegClassID;
1808 return SGPR_32RegClassID;
1810 return SGPR_64RegClassID;
1812 return SGPR_96RegClassID;
1814 return SGPR_128RegClassID;
1816 return SGPR_160RegClassID;
1818 return SGPR_256RegClassID;
1820 return SGPR_288RegClassID;
1822 return SGPR_320RegClassID;
1824 return SGPR_352RegClassID;
1826 return SGPR_384RegClassID;
1828 return SGPR_512RegClassID;
1839 return TTMP_32RegClassID;
1841 return TTMP_64RegClassID;
1843 return TTMP_128RegClassID;
1845 return TTMP_256RegClassID;
1847 return TTMP_288RegClassID;
1849 return TTMP_320RegClassID;
1851 return TTMP_352RegClassID;
1853 return TTMP_384RegClassID;
1855 return TTMP_512RegClassID;
1863 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1864 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1866 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1870 unsigned Val)
const {
1875 bool IsAGPR = Val & 512;
1878 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1887 unsigned Val)
const {
1890 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
1895 static_assert(SGPR_MIN == 0);
1904 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1905 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1906 Val == LITERAL_CONST)
1909 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1932 unsigned Val)
const {
1934 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
1937 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
1938 Val |= ~XDstReg & 1;
2031 const unsigned Val)
const {
2035 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2036 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2039 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2040 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2042 Val - SDWA9EncValues::SRC_VGPR_MIN);
2044 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2045 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2046 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2048 Val - SDWA9EncValues::SRC_SGPR_MIN);
2050 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2051 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2053 Val - SDWA9EncValues::SRC_TTMP_MIN);
2056 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2058 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2059 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2064 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2080 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2081 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2082 "SDWAVopcDst should be present only on GFX9+");
2084 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2086 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2087 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2103 unsigned Val)
const {
2104 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2110 unsigned Val)
const {
2127 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2130 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2140 if (
I == Versions.end())
2156 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2162 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2174 return STI.hasFeature(AMDGPU::FeatureGFX11);
2182 return STI.hasFeature(AMDGPU::FeatureGFX12);
2192 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2214 if (PopCount == 1) {
2215 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2217 S <<
"bits in range ("
2218 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2219 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2225#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2226#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2228 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2230#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2232 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2233 << GET_FIELD(MASK) << '\n'; \
2236#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2238 if (FourByteBuffer & (MASK)) { \
2239 return createStringError(std::errc::invalid_argument, \
2240 "kernel descriptor " DESC \
2241 " reserved %s set" MSG, \
2242 getBitRangeFromMask((MASK), 0).c_str()); \
2246#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2247#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2248 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2249#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2250 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2251#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2252 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2265 uint32_t GranulatedWorkitemVGPRCount =
2266 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2269 (GranulatedWorkitemVGPRCount + 1) *
2272 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2293 uint32_t GranulatedWavefrontSGPRCount =
2294 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2298 "must be zero on gfx10+");
2300 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2303 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2305 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2306 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << 0 <<
'\n';
2307 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2312 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2314 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2316 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2318 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2324 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2330 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2337 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2340 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2346 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2349 "COMPUTE_PGM_RSRC1");
2360 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2362 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2363 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2366 "COMPUTE_PGM_RSRC1");
2371 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2383 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2385 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2386 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2388 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2390 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2392 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2394 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2396 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2403 ".amdhsa_exception_fp_ieee_invalid_op",
2404 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2406 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2408 ".amdhsa_exception_fp_ieee_div_zero",
2409 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2411 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2415 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2417 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2430 KdStream << Indent <<
".amdhsa_accum_offset "
2431 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2434 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2437 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2439 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2443 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2445 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2448 "SHARED_VGPR_COUNT",
2449 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2453 "COMPUTE_PGM_RSRC3",
2454 "must be zero on gfx12+");
2460 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2462 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2464 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2467 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2470 "COMPUTE_PGM_RSRC3",
2471 "must be zero on gfx10");
2476 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2481 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2484 "COMPUTE_PGM_RSRC3",
2485 "must be zero on gfx10 or gfx11");
2491 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2493 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2495 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2497 "ENABLE_DIDT_THROTTLE",
2498 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2501 "COMPUTE_PGM_RSRC3",
2502 "must be zero on gfx10+");
2507 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2512 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2515 "COMPUTE_PGM_RSRC3",
2516 "must be zero on gfx10");
2518 }
else if (FourByteBuffer) {
2520 std::errc::invalid_argument,
2521 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2525#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2526#undef PRINT_DIRECTIVE
2528#undef CHECK_RESERVED_BITS_IMPL
2529#undef CHECK_RESERVED_BITS
2530#undef CHECK_RESERVED_BITS_MSG
2531#undef CHECK_RESERVED_BITS_DESC
2532#undef CHECK_RESERVED_BITS_DESC_MSG
2537 const char *Msg =
"") {
2539 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2546 unsigned WidthInBytes) {
2550 std::errc::invalid_argument,
2551 "kernel descriptor reserved bits in range (%u:%u) set",
2552 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2558#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2560 KdStream << Indent << DIRECTIVE " " \
2561 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2570 assert(Bytes.size() == 64);
2573 switch (Cursor.tell()) {
2575 FourByteBuffer = DE.
getU32(Cursor);
2576 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2581 FourByteBuffer = DE.
getU32(Cursor);
2582 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2583 << FourByteBuffer <<
'\n';
2587 FourByteBuffer = DE.
getU32(Cursor);
2588 KdStream << Indent <<
".amdhsa_kernarg_size "
2589 << FourByteBuffer <<
'\n';
2594 ReservedBytes = DE.
getBytes(Cursor, 4);
2595 for (
int I = 0;
I < 4; ++
I) {
2596 if (ReservedBytes[
I] != 0)
2610 ReservedBytes = DE.
getBytes(Cursor, 20);
2611 for (
int I = 0;
I < 20; ++
I) {
2612 if (ReservedBytes[
I] != 0)
2618 FourByteBuffer = DE.
getU32(Cursor);
2622 FourByteBuffer = DE.
getU32(Cursor);
2626 FourByteBuffer = DE.
getU32(Cursor);
2631 TwoByteBuffer = DE.
getU16(Cursor);
2635 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2637 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2641 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2643 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2650 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2656 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2658 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2663 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2668 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2670 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2679 TwoByteBuffer = DE.
getU16(Cursor);
2680 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2682 KERNARG_PRELOAD_SPEC_LENGTH);
2685 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2687 KERNARG_PRELOAD_SPEC_OFFSET);
2693 ReservedBytes = DE.
getBytes(Cursor, 4);
2694 for (
int I = 0;
I < 4; ++
I) {
2695 if (ReservedBytes[
I] != 0)
2704#undef PRINT_DIRECTIVE
2711 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2713 "kernel descriptor must be 64-byte aligned");
2724 EnableWavefrontSize32 =
2726 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2731 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2734 while (
C &&
C.tell() < Bytes.size()) {
2742 KdStream <<
".end_amdhsa_kernel\n";
2761 "code object v2 is not supported");
2774const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2777 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2785 if (!Valid || Res != Val)
2786 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2792 const uint64_t TSFlags = MCII->get(
MI.getOpcode()).TSFlags;
2827 if (Result != Symbols->end()) {
2828 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
2834 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
2853 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeLiteral64Constant(const MCInst &Inst) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc, bool ExtendFP64) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
int popcount(T Value) noexcept
Count the number of set bits in a value.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.