LLVM 18.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
46
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
51 // ToDo: AMDGPUDisassembler supports only VI ISA.
52 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
53 report_fatal_error("Disassembly not yet supported for subtarget");
54}
55
57addOperand(MCInst &Inst, const MCOperand& Opnd) {
58 Inst.addOperand(Opnd);
59 return Opnd.isValid() ?
62}
63
65 uint16_t NameIdx) {
66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67 if (OpIdx != -1) {
68 auto I = MI.begin();
69 std::advance(I, OpIdx);
70 MI.insert(I, Op);
71 }
72 return OpIdx;
73}
74
75static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
77 const MCDisassembler *Decoder) {
78 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79
80 // Our branches take a simm16, but we need two extra bits to account for the
81 // factor of 4.
82 APInt SignedOffset(18, Imm * 4, true);
83 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84
85 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
87 return addOperand(Inst, MCOperand::createImm(Imm));
88}
89
90static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91 const MCDisassembler *Decoder) {
92 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93 int64_t Offset;
94 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
95 Offset = SignExtend64<24>(Imm);
96 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
97 Offset = Imm & 0xFFFFF;
98 } else { // GFX9+ supports 21-bit signed offsets.
99 Offset = SignExtend64<21>(Imm);
100 }
102}
103
104static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
105 const MCDisassembler *Decoder) {
106 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
107 return addOperand(Inst, DAsm->decodeBoolReg(Val));
108}
109
110#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
111 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
112 uint64_t /*Addr*/, \
113 const MCDisassembler *Decoder) { \
114 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
115 return addOperand(Inst, DAsm->DecoderName(Imm)); \
116 }
117
118// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
119// number of register. Used by VGPR only and AGPR only operands.
120#define DECODE_OPERAND_REG_8(RegClass) \
121 static DecodeStatus Decode##RegClass##RegisterClass( \
122 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
123 const MCDisassembler *Decoder) { \
124 assert(Imm < (1 << 8) && "8-bit encoding"); \
125 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
126 return addOperand( \
127 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
128 }
129
130#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
131 ImmWidth) \
132 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
133 const MCDisassembler *Decoder) { \
134 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
135 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
136 return addOperand(Inst, \
137 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
138 MandatoryLiteral, ImmWidth)); \
139 }
140
141// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
142// get register class. Used by SGPR only operands.
143#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
144 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
145
146// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
147// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
148// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
149// Used by AV_ register classes (AGPR or VGPR only register operands).
150#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
151 DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
152 Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
153
154// Decoder for Src(9-bit encoding) registers only.
155#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
156 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
157
158// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
159// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
160// only.
161#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
162 DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
163
164// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
165// Imm{9} is acc, registers only.
166#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
167 DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
168
169// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
170// register from RegClass or immediate. Registers that don't belong to RegClass
171// will be decoded and InstPrinter will report warning. Immediate will be
172// decoded into constant of size ImmWidth, should match width of immediate used
173// by OperandType (important for floating point types).
174#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
175 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
176 false, ImmWidth)
177
178// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
179// and decode using 'enum10' from decodeSrcOp.
180#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
181 DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
182 Imm | 512, false, ImmWidth)
183
184#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
185 DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
186 OpWidth, Imm, true, ImmWidth)
187
188// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
189// when RegisterClass is used as an operand. Most often used for destination
190// operands.
191
193DECODE_OPERAND_REG_8(VGPR_32_Lo128)
196DECODE_OPERAND_REG_8(VReg_128)
197DECODE_OPERAND_REG_8(VReg_256)
198DECODE_OPERAND_REG_8(VReg_288)
199DECODE_OPERAND_REG_8(VReg_352)
200DECODE_OPERAND_REG_8(VReg_384)
201DECODE_OPERAND_REG_8(VReg_512)
202DECODE_OPERAND_REG_8(VReg_1024)
203
204DECODE_OPERAND_REG_7(SReg_32, OPW32)
205DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
206DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
207DECODE_OPERAND_REG_7(SReg_64, OPW64)
208DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
209DECODE_OPERAND_REG_7(SReg_128, OPW128)
210DECODE_OPERAND_REG_7(SReg_256, OPW256)
211DECODE_OPERAND_REG_7(SReg_512, OPW512)
212
215DECODE_OPERAND_REG_8(AReg_128)
216DECODE_OPERAND_REG_8(AReg_256)
217DECODE_OPERAND_REG_8(AReg_512)
218DECODE_OPERAND_REG_8(AReg_1024)
219
220DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
221DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
222
223// Decoders for register only source RegisterOperands that use use 9-bit Src
224// encoding: 'decodeOperand_<RegClass>'.
225
226DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
227DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
228DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
229DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
230DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
231
232DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
233
234DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
235DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
236DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
237
238// Decoders for register or immediate RegisterOperands that use 9-bit Src
239// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
240
241DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
242DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
243DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
244DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
245DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
246DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
247DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
248DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
249DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
250DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
251DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
252DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
253DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
254DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
255
256DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
257DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
258DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
259DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
260DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
261
262DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
266
268 uint64_t /*Addr*/,
269 const MCDisassembler *Decoder) {
270 assert(isUInt<10>(Imm) && "10-bit encoding expected");
271 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
272
273 bool IsHi = Imm & (1 << 9);
274 unsigned RegIdx = Imm & 0xff;
275 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
276 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
277}
278
279static DecodeStatus
281 const MCDisassembler *Decoder) {
282 assert(isUInt<8>(Imm) && "8-bit encoding expected");
283
284 bool IsHi = Imm & (1 << 7);
285 unsigned RegIdx = Imm & 0x7f;
286 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
287 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
288}
289
291 uint64_t /*Addr*/,
292 const MCDisassembler *Decoder) {
293 assert(isUInt<9>(Imm) && "9-bit encoding expected");
294
295 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
296 bool IsVGPR = Imm & (1 << 8);
297 if (IsVGPR) {
298 bool IsHi = Imm & (1 << 7);
299 unsigned RegIdx = Imm & 0x7f;
300 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
301 }
302 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
303 Imm & 0xFF, false, 16));
304}
305
306static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
307 uint64_t /*Addr*/,
308 const MCDisassembler *Decoder) {
309 assert(isUInt<10>(Imm) && "10-bit encoding expected");
310
311 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
312 bool IsVGPR = Imm & (1 << 8);
313 if (IsVGPR) {
314 bool IsHi = Imm & (1 << 9);
315 unsigned RegIdx = Imm & 0xff;
316 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
317 }
318 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
319 Imm & 0xFF, false, 16));
320}
321
322static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
324 const MCDisassembler *Decoder) {
325 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
326 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
327}
328
329static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
330 uint64_t Addr, const void *Decoder) {
331 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
332 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
333}
334
335static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
336 const MCRegisterInfo *MRI) {
337 if (OpIdx < 0)
338 return false;
339
340 const MCOperand &Op = Inst.getOperand(OpIdx);
341 if (!Op.isReg())
342 return false;
343
344 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
345 auto Reg = Sub ? Sub : Op.getReg();
346 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
347}
348
351 const MCDisassembler *Decoder) {
352 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
353 if (!DAsm->isGFX90A()) {
354 Imm &= 511;
355 } else {
356 // If atomic has both vdata and vdst their register classes are tied.
357 // The bit is decoded along with the vdst, first operand. We need to
358 // change register class to AGPR if vdst was AGPR.
359 // If a DS instruction has both data0 and data1 their register classes
360 // are also tied.
361 unsigned Opc = Inst.getOpcode();
362 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
363 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
364 : AMDGPU::OpName::vdata;
365 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
366 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
367 if ((int)Inst.getNumOperands() == DataIdx) {
368 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
369 if (IsAGPROperand(Inst, DstIdx, MRI))
370 Imm |= 512;
371 }
372
373 if (TSFlags & SIInstrFlags::DS) {
374 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
375 if ((int)Inst.getNumOperands() == Data2Idx &&
376 IsAGPROperand(Inst, DataIdx, MRI))
377 Imm |= 512;
378 }
379 }
380 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
381}
382
383static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
385 const MCDisassembler *Decoder) {
386 assert(Imm < (1 << 9) && "9-bit encoding");
387 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
388 return addOperand(
389 Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, true));
390}
391
392static DecodeStatus
394 const MCDisassembler *Decoder) {
395 return decodeOperand_AVLdSt_Any(Inst, Imm,
397}
398
399static DecodeStatus
401 const MCDisassembler *Decoder) {
402 return decodeOperand_AVLdSt_Any(Inst, Imm,
404}
405
406static DecodeStatus
408 const MCDisassembler *Decoder) {
409 return decodeOperand_AVLdSt_Any(Inst, Imm,
411}
412
413static DecodeStatus
415 const MCDisassembler *Decoder) {
416 return decodeOperand_AVLdSt_Any(Inst, Imm,
418}
419
420static DecodeStatus
422 const MCDisassembler *Decoder) {
424 Decoder);
425}
426
427#define DECODE_SDWA(DecName) \
428DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
429
430DECODE_SDWA(Src32)
431DECODE_SDWA(Src16)
432DECODE_SDWA(VopcDst)
433
434#include "AMDGPUGenDisassemblerTables.inc"
435
436//===----------------------------------------------------------------------===//
437//
438//===----------------------------------------------------------------------===//
439
440template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
441 assert(Bytes.size() >= sizeof(T));
442 const auto Res =
443 support::endian::read<T, llvm::endianness::little>(Bytes.data());
444 Bytes = Bytes.slice(sizeof(T));
445 return Res;
446}
447
449 assert(Bytes.size() >= 12);
450 uint64_t Lo =
451 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
452 Bytes = Bytes.slice(8);
453 uint64_t Hi =
454 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
455 Bytes = Bytes.slice(4);
456 return DecoderUInt128(Lo, Hi);
457}
458
459// The disassembler is greedy, so we need to check FI operand value to
460// not parse a dpp if the correct literal is not set. For dpp16 the
461// autogenerated decoder checks the dpp literal
462static bool isValidDPP8(const MCInst &MI) {
463 using namespace llvm::AMDGPU::DPP;
464 int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
465 assert(FiIdx != -1);
466 if ((unsigned)FiIdx >= MI.getNumOperands())
467 return false;
468 unsigned Fi = MI.getOperand(FiIdx).getImm();
469 return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
470}
471
473 ArrayRef<uint8_t> Bytes_,
475 raw_ostream &CS) const {
476 bool IsSDWA = false;
477
478 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
479 Bytes = Bytes_.slice(0, MaxInstBytesNum);
480
482 do {
483 // ToDo: better to switch encoding length using some bit predicate
484 // but it is unknown yet, so try all we can
485
486 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
487 // encodings
488 if (isGFX11Plus() && Bytes.size() >= 12 ) {
489 DecoderUInt128 DecW = eat12Bytes(Bytes);
490 Res =
491 tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
492 MI, DecW, Address, CS);
494 break;
495 MI = MCInst(); // clear
496 Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
497 MI, DecW, Address, CS);
498 if (Res) {
499 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
501 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
502 convertVOPCDPPInst(MI); // Special VOP3 case
503 else {
504 assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
505 convertVOP3DPPInst(MI); // Regular VOP3 case
506 }
507 break;
508 }
509 Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
510 if (Res)
511 break;
512
513 Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
514 if (Res)
515 break;
516 }
517 // Reinitialize Bytes
518 Bytes = Bytes_.slice(0, MaxInstBytesNum);
519
520 if (Bytes.size() >= 8) {
521 const uint64_t QW = eatBytes<uint64_t>(Bytes);
522
523 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
524 Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
525 if (Res) {
526 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
527 == -1)
528 break;
530 break;
531 MI = MCInst(); // clear
532 }
533 }
534
535 Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
537 break;
538 MI = MCInst(); // clear
539
540 Res = tryDecodeInst(DecoderTableDPP8GFX1164,
541 DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
543 break;
544 MI = MCInst(); // clear
545
546 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
547 if (Res) break;
548
549 Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
550 MI, QW, Address, CS);
551 if (Res) {
552 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
554 break;
555 }
556
557 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
558 if (Res) { IsSDWA = true; break; }
559
560 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
561 if (Res) { IsSDWA = true; break; }
562
563 Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
564 if (Res) { IsSDWA = true; break; }
565
566 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
567 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
568 if (Res)
569 break;
570 }
571
572 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
573 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
574 // table first so we print the correct name.
575 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
576 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
577 if (Res)
578 break;
579 }
580 }
581
582 // Reinitialize Bytes as DPP64 could have eaten too much
583 Bytes = Bytes_.slice(0, MaxInstBytesNum);
584
585 // Try decode 32-bit instruction
586 if (Bytes.size() < 4) break;
587 const uint32_t DW = eatBytes<uint32_t>(Bytes);
588 Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
589 if (Res) break;
590
591 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
592 if (Res) break;
593
594 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
595 if (Res) break;
596
597 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
598 Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
599 if (Res)
600 break;
601 }
602
603 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
604 Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
605 if (Res) break;
606 }
607
608 Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
609 if (Res) break;
610
611 Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
612 Address, CS);
613 if (Res) break;
614
615 Res = tryDecodeInst(DecoderTableGFX1232, MI, DW, Address, CS);
616 if (Res)
617 break;
618
619 if (Bytes.size() < 4) break;
620 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
621
622 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
623 Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
624 if (Res)
625 break;
626 }
627
628 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
629 Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
630 if (Res)
631 break;
632 }
633
634 Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
635 if (Res) break;
636
637 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
638 if (Res) break;
639
640 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
641 if (Res) break;
642
643 Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
644 if (Res) break;
645
646 Res = tryDecodeInst(DecoderTableGFX1264, MI, QW, Address, CS);
647 if (Res)
648 break;
649
650 Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
651 Address, CS);
652 if (Res)
653 break;
654
655 Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
656 } while (false);
657
658 if (Res && AMDGPU::isMAC(MI.getOpcode())) {
659 // Insert dummy unused src2_modifiers.
661 AMDGPU::OpName::src2_modifiers);
662 }
663
664 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
666 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
667 AMDGPU::OpName::cpol);
668 if (CPolPos != -1) {
669 unsigned CPol =
670 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
672 if (MI.getNumOperands() <= (unsigned)CPolPos) {
674 AMDGPU::OpName::cpol);
675 } else if (CPol) {
676 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
677 }
678 }
679 }
680
681 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
683 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
684 // GFX90A lost TFE, its place is occupied by ACC.
685 int TFEOpIdx =
686 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
687 if (TFEOpIdx != -1) {
688 auto TFEIter = MI.begin();
689 std::advance(TFEIter, TFEOpIdx);
690 MI.insert(TFEIter, MCOperand::createImm(0));
691 }
692 }
693
694 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
696 int SWZOpIdx =
697 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
698 if (SWZOpIdx != -1) {
699 auto SWZIter = MI.begin();
700 std::advance(SWZIter, SWZOpIdx);
701 MI.insert(SWZIter, MCOperand::createImm(0));
702 }
703 }
704
705 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
706 int VAddr0Idx =
707 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
708 int RsrcIdx =
709 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
710 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
711 if (VAddr0Idx >= 0 && NSAArgs > 0) {
712 unsigned NSAWords = (NSAArgs + 3) / 4;
713 if (Bytes.size() < 4 * NSAWords) {
715 } else {
716 for (unsigned i = 0; i < NSAArgs; ++i) {
717 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
718 auto VAddrRCID =
719 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
720 MI.insert(MI.begin() + VAddrIdx,
721 createRegOperand(VAddrRCID, Bytes[i]));
722 }
723 Bytes = Bytes.slice(4 * NSAWords);
724 }
725 }
726
727 if (Res)
728 Res = convertMIMGInst(MI);
729 }
730
731 if (Res && (MCII->get(MI.getOpcode()).TSFlags &
733 Res = convertMIMGInst(MI);
734
735 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
736 Res = convertEXPInst(MI);
737
738 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
739 Res = convertVINTERPInst(MI);
740
741 if (Res && IsSDWA)
742 Res = convertSDWAInst(MI);
743
744 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
745 AMDGPU::OpName::vdst_in);
746 if (VDstIn_Idx != -1) {
747 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
749 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
750 !MI.getOperand(VDstIn_Idx).isReg() ||
751 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
752 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
753 MI.erase(&MI.getOperand(VDstIn_Idx));
755 MCOperand::createReg(MI.getOperand(Tied).getReg()),
756 AMDGPU::OpName::vdst_in);
757 }
758 }
759
760 int ImmLitIdx =
761 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
762 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
763 if (Res && ImmLitIdx != -1 && !IsSOPK)
764 Res = convertFMAanyK(MI, ImmLitIdx);
765
766 // if the opcode was not recognized we'll assume a Size of 4 bytes
767 // (unless there are fewer bytes left)
768 Size = Res ? (MaxInstBytesNum - Bytes.size())
769 : std::min((size_t)4, Bytes_.size());
770 return Res;
771}
772
774 if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
775 // The MCInst still has these fields even though they are no longer encoded
776 // in the GFX11 instruction.
777 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
778 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
779 }
781}
782
784 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
785 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
786 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
787 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
788 // The MCInst has this field that is not directly encoded in the
789 // instruction.
790 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
791 }
793}
794
796 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
797 STI.hasFeature(AMDGPU::FeatureGFX10)) {
798 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
799 // VOPC - insert clamp
800 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
801 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
802 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
803 if (SDst != -1) {
804 // VOPC - insert VCC register as sdst
806 AMDGPU::OpName::sdst);
807 } else {
808 // VOP1/2 - insert omod if present in instruction
809 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
810 }
811 }
813}
814
816 unsigned OpSel = 0;
817 unsigned OpSelHi = 0;
818 unsigned NegLo = 0;
819 unsigned NegHi = 0;
820};
821
822// Reconstruct values of VOP3/VOP3P operands such as op_sel.
823// Note that these values do not affect disassembler output,
824// so this is only necessary for consistency with src_modifiers.
826 bool IsVOP3P = false) {
827 VOPModifiers Modifiers;
828 unsigned Opc = MI.getOpcode();
829 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
830 AMDGPU::OpName::src1_modifiers,
831 AMDGPU::OpName::src2_modifiers};
832 for (int J = 0; J < 3; ++J) {
833 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
834 if (OpIdx == -1)
835 continue;
836
837 unsigned Val = MI.getOperand(OpIdx).getImm();
838
839 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
840 if (IsVOP3P) {
841 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
842 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
843 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
844 } else if (J == 0) {
845 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
846 }
847 }
848
849 return Modifiers;
850}
851
852// MAC opcodes have special old and src2 operands.
853// src2 is tied to dst, while old is not tied (but assumed to be).
855 constexpr int DST_IDX = 0;
856 auto Opcode = MI.getOpcode();
857 const auto &Desc = MCII->get(Opcode);
858 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
859
860 if (OldIdx != -1 && Desc.getOperandConstraint(
861 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
862 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
863 assert(Desc.getOperandConstraint(
864 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
866 (void)DST_IDX;
867 return true;
868 }
869
870 return false;
871}
872
873// Create dummy old operand and insert dummy unused src2_modifiers
875 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
876 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
878 AMDGPU::OpName::src2_modifiers);
879}
880
881// We must check FI == literal to reject not genuine dpp8 insts, and we must
882// first add optional MI operands to check FI
884 unsigned Opc = MI.getOpcode();
885 if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
887 } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
888 AMDGPU::isVOPC64DPP(Opc)) {
890 } else {
891 if (isMacDPP(MI))
893
894 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
895 if (MI.getNumOperands() < DescNumOps &&
896 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
897 auto Mods = collectVOPModifiers(MI);
899 AMDGPU::OpName::op_sel);
900 } else {
901 // Insert dummy unused src modifiers.
902 if (MI.getNumOperands() < DescNumOps &&
903 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
905 AMDGPU::OpName::src0_modifiers);
906
907 if (MI.getNumOperands() < DescNumOps &&
908 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
910 AMDGPU::OpName::src1_modifiers);
911 }
912 }
914}
915
917 if (isMacDPP(MI))
919
920 unsigned Opc = MI.getOpcode();
921 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
922 if (MI.getNumOperands() < DescNumOps &&
923 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
924 auto Mods = collectVOPModifiers(MI);
926 AMDGPU::OpName::op_sel);
927 }
929}
930
931// Note that before gfx10, the MIMG encoding provided no information about
932// VADDR size. Consequently, decoded instructions always show address as if it
933// has 1 dword, which could be not really so.
935 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
936
937 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
938 AMDGPU::OpName::vdst);
939
940 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
941 AMDGPU::OpName::vdata);
942 int VAddr0Idx =
943 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
944 int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
945 : AMDGPU::OpName::rsrc;
946 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
947 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
948 AMDGPU::OpName::dmask);
949
950 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
951 AMDGPU::OpName::tfe);
952 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
953 AMDGPU::OpName::d16);
954
955 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
956 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
958
959 assert(VDataIdx != -1);
960 if (BaseOpcode->BVH) {
961 // Add A16 operand for intersect_ray instructions
962 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
964 }
965
966 bool IsAtomic = (VDstIdx != -1);
967 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
968 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
969 bool IsNSA = false;
970 bool IsPartialNSA = false;
971 unsigned AddrSize = Info->VAddrDwords;
972
973 if (isGFX10Plus()) {
974 unsigned DimIdx =
975 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
976 int A16Idx =
977 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
978 const AMDGPU::MIMGDimInfo *Dim =
979 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
980 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
981
982 AddrSize =
983 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
984
985 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
986 // VIMAGE insts other than BVH never use vaddr4.
987 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
988 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
989 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
990 if (!IsNSA) {
991 if (!IsVSample && AddrSize > 12)
992 AddrSize = 16;
993 } else {
994 if (AddrSize > Info->VAddrDwords) {
995 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
996 // The NSA encoding does not contain enough operands for the
997 // combination of base opcode / dimension. Should this be an error?
999 }
1000 IsPartialNSA = true;
1001 }
1002 }
1003 }
1004
1005 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1006 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1007
1008 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1009 if (D16 && AMDGPU::hasPackedD16(STI)) {
1010 DstSize = (DstSize + 1) / 2;
1011 }
1012
1013 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1014 DstSize += 1;
1015
1016 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1018
1019 int NewOpcode =
1020 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1021 if (NewOpcode == -1)
1023
1024 // Widen the register to the correct number of enabled channels.
1025 unsigned NewVdata = AMDGPU::NoRegister;
1026 if (DstSize != Info->VDataDwords) {
1027 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1028
1029 // Get first subregister of VData
1030 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1031 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1032 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1033
1034 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1035 &MRI.getRegClass(DataRCID));
1036 if (NewVdata == AMDGPU::NoRegister) {
1037 // It's possible to encode this such that the low register + enabled
1038 // components exceeds the register count.
1040 }
1041 }
1042
1043 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1044 // If using partial NSA on GFX11+ widen last address register.
1045 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1046 unsigned NewVAddrSA = AMDGPU::NoRegister;
1047 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1048 AddrSize != Info->VAddrDwords) {
1049 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1050 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1051 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1052
1053 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1054 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1055 &MRI.getRegClass(AddrRCID));
1056 if (!NewVAddrSA)
1058 }
1059
1060 MI.setOpcode(NewOpcode);
1061
1062 if (NewVdata != AMDGPU::NoRegister) {
1063 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1064
1065 if (IsAtomic) {
1066 // Atomic operations have an additional operand (a copy of data)
1067 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1068 }
1069 }
1070
1071 if (NewVAddrSA) {
1072 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1073 } else if (IsNSA) {
1074 assert(AddrSize <= Info->VAddrDwords);
1075 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1076 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1077 }
1078
1080}
1081
1082// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1083// decoder only adds to src_modifiers, so manually add the bits to the other
1084// operands.
1086 unsigned Opc = MI.getOpcode();
1087 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1088 auto Mods = collectVOPModifiers(MI, true);
1089
1090 if (MI.getNumOperands() < DescNumOps &&
1091 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1092 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1093
1094 if (MI.getNumOperands() < DescNumOps &&
1095 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1097 AMDGPU::OpName::op_sel);
1098 if (MI.getNumOperands() < DescNumOps &&
1099 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1101 AMDGPU::OpName::op_sel_hi);
1102 if (MI.getNumOperands() < DescNumOps &&
1103 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1105 AMDGPU::OpName::neg_lo);
1106 if (MI.getNumOperands() < DescNumOps &&
1107 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1109 AMDGPU::OpName::neg_hi);
1110
1112}
1113
1114// Create dummy old operand and insert optional operands
1116 unsigned Opc = MI.getOpcode();
1117 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1118
1119 if (MI.getNumOperands() < DescNumOps &&
1120 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1121 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1122
1123 if (MI.getNumOperands() < DescNumOps &&
1124 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1126 AMDGPU::OpName::src0_modifiers);
1127
1128 if (MI.getNumOperands() < DescNumOps &&
1129 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1131 AMDGPU::OpName::src1_modifiers);
1133}
1134
1136 int ImmLitIdx) const {
1137 assert(HasLiteral && "Should have decoded a literal");
1138 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1139 unsigned DescNumOps = Desc.getNumOperands();
1141 AMDGPU::OpName::immDeferred);
1142 assert(DescNumOps == MI.getNumOperands());
1143 for (unsigned I = 0; I < DescNumOps; ++I) {
1144 auto &Op = MI.getOperand(I);
1145 auto OpType = Desc.operands()[I].OperandType;
1146 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1148 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1149 IsDeferredOp)
1150 Op.setImm(Literal);
1151 }
1153}
1154
1155const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1156 return getContext().getRegisterInfo()->
1157 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1158}
1159
1160inline
1162 const Twine& ErrMsg) const {
1163 *CommentStream << "Error: " + ErrMsg;
1164
1165 // ToDo: add support for error operands to MCInst.h
1166 // return MCOperand::createError(V);
1167 return MCOperand();
1168}
1169
1170inline
1173}
1174
1175inline
1177 unsigned Val) const {
1178 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1179 if (Val >= RegCl.getNumRegs())
1180 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1181 ": unknown register " + Twine(Val));
1182 return createRegOperand(RegCl.getRegister(Val));
1183}
1184
1185inline
1187 unsigned Val) const {
1188 // ToDo: SI/CI have 104 SGPRs, VI - 102
1189 // Valery: here we accepting as much as we can, let assembler sort it out
1190 int shift = 0;
1191 switch (SRegClassID) {
1192 case AMDGPU::SGPR_32RegClassID:
1193 case AMDGPU::TTMP_32RegClassID:
1194 break;
1195 case AMDGPU::SGPR_64RegClassID:
1196 case AMDGPU::TTMP_64RegClassID:
1197 shift = 1;
1198 break;
1199 case AMDGPU::SGPR_128RegClassID:
1200 case AMDGPU::TTMP_128RegClassID:
1201 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1202 // this bundle?
1203 case AMDGPU::SGPR_256RegClassID:
1204 case AMDGPU::TTMP_256RegClassID:
1205 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1206 // this bundle?
1207 case AMDGPU::SGPR_288RegClassID:
1208 case AMDGPU::TTMP_288RegClassID:
1209 case AMDGPU::SGPR_320RegClassID:
1210 case AMDGPU::TTMP_320RegClassID:
1211 case AMDGPU::SGPR_352RegClassID:
1212 case AMDGPU::TTMP_352RegClassID:
1213 case AMDGPU::SGPR_384RegClassID:
1214 case AMDGPU::TTMP_384RegClassID:
1215 case AMDGPU::SGPR_512RegClassID:
1216 case AMDGPU::TTMP_512RegClassID:
1217 shift = 2;
1218 break;
1219 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1220 // this bundle?
1221 default:
1222 llvm_unreachable("unhandled register class");
1223 }
1224
1225 if (Val % (1 << shift)) {
1226 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1227 << ": scalar reg isn't aligned " << Val;
1228 }
1229
1230 return createRegOperand(SRegClassID, Val >> shift);
1231}
1232
1234 bool IsHi) const {
1235 unsigned RCID =
1236 IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
1237 return createRegOperand(RCID, RegIdx);
1238}
1239
1240// Decode Literals for insts which always have a literal in the encoding
1243 if (HasLiteral) {
1244 assert(
1246 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1247 if (Literal != Val)
1248 return errOperand(Val, "More than one unique literal is illegal");
1249 }
1250 HasLiteral = true;
1251 Literal = Val;
1252 return MCOperand::createImm(Literal);
1253}
1254
1256 // For now all literal constants are supposed to be unsigned integer
1257 // ToDo: deal with signed/unsigned 64-bit integer constants
1258 // ToDo: deal with float/double constants
1259 if (!HasLiteral) {
1260 if (Bytes.size() < 4) {
1261 return errOperand(0, "cannot read literal, inst bytes left " +
1262 Twine(Bytes.size()));
1263 }
1264 HasLiteral = true;
1265 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1266 if (ExtendFP64)
1267 Literal64 <<= 32;
1268 }
1269 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1270}
1271
1273 using namespace AMDGPU::EncValues;
1274
1275 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1276 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1277 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1278 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1279 // Cast prevents negative overflow.
1280}
1281
1282static int64_t getInlineImmVal32(unsigned Imm) {
1283 switch (Imm) {
1284 case 240:
1285 return llvm::bit_cast<uint32_t>(0.5f);
1286 case 241:
1287 return llvm::bit_cast<uint32_t>(-0.5f);
1288 case 242:
1289 return llvm::bit_cast<uint32_t>(1.0f);
1290 case 243:
1291 return llvm::bit_cast<uint32_t>(-1.0f);
1292 case 244:
1293 return llvm::bit_cast<uint32_t>(2.0f);
1294 case 245:
1295 return llvm::bit_cast<uint32_t>(-2.0f);
1296 case 246:
1297 return llvm::bit_cast<uint32_t>(4.0f);
1298 case 247:
1299 return llvm::bit_cast<uint32_t>(-4.0f);
1300 case 248: // 1 / (2 * PI)
1301 return 0x3e22f983;
1302 default:
1303 llvm_unreachable("invalid fp inline imm");
1304 }
1305}
1306
1307static int64_t getInlineImmVal64(unsigned Imm) {
1308 switch (Imm) {
1309 case 240:
1310 return llvm::bit_cast<uint64_t>(0.5);
1311 case 241:
1312 return llvm::bit_cast<uint64_t>(-0.5);
1313 case 242:
1314 return llvm::bit_cast<uint64_t>(1.0);
1315 case 243:
1316 return llvm::bit_cast<uint64_t>(-1.0);
1317 case 244:
1318 return llvm::bit_cast<uint64_t>(2.0);
1319 case 245:
1320 return llvm::bit_cast<uint64_t>(-2.0);
1321 case 246:
1322 return llvm::bit_cast<uint64_t>(4.0);
1323 case 247:
1324 return llvm::bit_cast<uint64_t>(-4.0);
1325 case 248: // 1 / (2 * PI)
1326 return 0x3fc45f306dc9c882;
1327 default:
1328 llvm_unreachable("invalid fp inline imm");
1329 }
1330}
1331
1332static int64_t getInlineImmVal16(unsigned Imm) {
1333 switch (Imm) {
1334 case 240:
1335 return 0x3800;
1336 case 241:
1337 return 0xB800;
1338 case 242:
1339 return 0x3C00;
1340 case 243:
1341 return 0xBC00;
1342 case 244:
1343 return 0x4000;
1344 case 245:
1345 return 0xC000;
1346 case 246:
1347 return 0x4400;
1348 case 247:
1349 return 0xC400;
1350 case 248: // 1 / (2 * PI)
1351 return 0x3118;
1352 default:
1353 llvm_unreachable("invalid fp inline imm");
1354 }
1355}
1356
1357MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1360
1361 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1362 // ImmWidth 0 is a default case where operand should not allow immediates.
1363 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1364 // use it to print verbose error message.
1365 switch (ImmWidth) {
1366 case 0:
1367 case 32:
1369 case 64:
1371 case 16:
1373 default:
1374 llvm_unreachable("implement me");
1375 }
1376}
1377
1379 using namespace AMDGPU;
1380
1381 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1382 switch (Width) {
1383 default: // fall
1384 case OPW32:
1385 case OPW16:
1386 case OPWV216:
1387 return VGPR_32RegClassID;
1388 case OPW64:
1389 case OPWV232: return VReg_64RegClassID;
1390 case OPW96: return VReg_96RegClassID;
1391 case OPW128: return VReg_128RegClassID;
1392 case OPW160: return VReg_160RegClassID;
1393 case OPW256: return VReg_256RegClassID;
1394 case OPW288: return VReg_288RegClassID;
1395 case OPW320: return VReg_320RegClassID;
1396 case OPW352: return VReg_352RegClassID;
1397 case OPW384: return VReg_384RegClassID;
1398 case OPW512: return VReg_512RegClassID;
1399 case OPW1024: return VReg_1024RegClassID;
1400 }
1401}
1402
1404 using namespace AMDGPU;
1405
1406 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1407 switch (Width) {
1408 default: // fall
1409 case OPW32:
1410 case OPW16:
1411 case OPWV216:
1412 return AGPR_32RegClassID;
1413 case OPW64:
1414 case OPWV232: return AReg_64RegClassID;
1415 case OPW96: return AReg_96RegClassID;
1416 case OPW128: return AReg_128RegClassID;
1417 case OPW160: return AReg_160RegClassID;
1418 case OPW256: return AReg_256RegClassID;
1419 case OPW288: return AReg_288RegClassID;
1420 case OPW320: return AReg_320RegClassID;
1421 case OPW352: return AReg_352RegClassID;
1422 case OPW384: return AReg_384RegClassID;
1423 case OPW512: return AReg_512RegClassID;
1424 case OPW1024: return AReg_1024RegClassID;
1425 }
1426}
1427
1428
1430 using namespace AMDGPU;
1431
1432 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1433 switch (Width) {
1434 default: // fall
1435 case OPW32:
1436 case OPW16:
1437 case OPWV216:
1438 return SGPR_32RegClassID;
1439 case OPW64:
1440 case OPWV232: return SGPR_64RegClassID;
1441 case OPW96: return SGPR_96RegClassID;
1442 case OPW128: return SGPR_128RegClassID;
1443 case OPW160: return SGPR_160RegClassID;
1444 case OPW256: return SGPR_256RegClassID;
1445 case OPW288: return SGPR_288RegClassID;
1446 case OPW320: return SGPR_320RegClassID;
1447 case OPW352: return SGPR_352RegClassID;
1448 case OPW384: return SGPR_384RegClassID;
1449 case OPW512: return SGPR_512RegClassID;
1450 }
1451}
1452
1454 using namespace AMDGPU;
1455
1456 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1457 switch (Width) {
1458 default: // fall
1459 case OPW32:
1460 case OPW16:
1461 case OPWV216:
1462 return TTMP_32RegClassID;
1463 case OPW64:
1464 case OPWV232: return TTMP_64RegClassID;
1465 case OPW128: return TTMP_128RegClassID;
1466 case OPW256: return TTMP_256RegClassID;
1467 case OPW288: return TTMP_288RegClassID;
1468 case OPW320: return TTMP_320RegClassID;
1469 case OPW352: return TTMP_352RegClassID;
1470 case OPW384: return TTMP_384RegClassID;
1471 case OPW512: return TTMP_512RegClassID;
1472 }
1473}
1474
1475int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1476 using namespace AMDGPU::EncValues;
1477
1478 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1479 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1480
1481 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1482}
1483
1485 bool MandatoryLiteral,
1486 unsigned ImmWidth, bool IsFP) const {
1487 using namespace AMDGPU::EncValues;
1488
1489 assert(Val < 1024); // enum10
1490
1491 bool IsAGPR = Val & 512;
1492 Val &= 511;
1493
1494 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1495 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1496 : getVgprClassId(Width), Val - VGPR_MIN);
1497 }
1498 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1499 IsFP);
1500}
1501
1503 unsigned Val,
1504 bool MandatoryLiteral,
1505 unsigned ImmWidth,
1506 bool IsFP) const {
1507 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1508 // decoded earlier.
1509 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1510 using namespace AMDGPU::EncValues;
1511
1512 if (Val <= SGPR_MAX) {
1513 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1514 static_assert(SGPR_MIN == 0);
1515 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1516 }
1517
1518 int TTmpIdx = getTTmpIdx(Val);
1519 if (TTmpIdx >= 0) {
1520 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1521 }
1522
1523 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1524 return decodeIntImmed(Val);
1525
1526 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1527 return decodeFPImmed(ImmWidth, Val);
1528
1529 if (Val == LITERAL_CONST) {
1530 if (MandatoryLiteral)
1531 // Keep a sentinel value for deferred setting
1532 return MCOperand::createImm(LITERAL_CONST);
1533 else
1534 return decodeLiteralConstant(IsFP && ImmWidth == 64);
1535 }
1536
1537 switch (Width) {
1538 case OPW32:
1539 case OPW16:
1540 case OPWV216:
1541 return decodeSpecialReg32(Val);
1542 case OPW64:
1543 case OPWV232:
1544 return decodeSpecialReg64(Val);
1545 default:
1546 llvm_unreachable("unexpected immediate type");
1547 }
1548}
1549
1550// Bit 0 of DstY isn't stored in the instruction, because it's always the
1551// opposite of bit 0 of DstX.
1553 unsigned Val) const {
1554 int VDstXInd =
1555 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1556 assert(VDstXInd != -1);
1557 assert(Inst.getOperand(VDstXInd).isReg());
1558 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1559 Val |= ~XDstReg & 1;
1561 return createRegOperand(getVgprClassId(Width), Val);
1562}
1563
1565 using namespace AMDGPU;
1566
1567 switch (Val) {
1568 // clang-format off
1569 case 102: return createRegOperand(FLAT_SCR_LO);
1570 case 103: return createRegOperand(FLAT_SCR_HI);
1571 case 104: return createRegOperand(XNACK_MASK_LO);
1572 case 105: return createRegOperand(XNACK_MASK_HI);
1573 case 106: return createRegOperand(VCC_LO);
1574 case 107: return createRegOperand(VCC_HI);
1575 case 108: return createRegOperand(TBA_LO);
1576 case 109: return createRegOperand(TBA_HI);
1577 case 110: return createRegOperand(TMA_LO);
1578 case 111: return createRegOperand(TMA_HI);
1579 case 124:
1580 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1581 case 125:
1582 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1583 case 126: return createRegOperand(EXEC_LO);
1584 case 127: return createRegOperand(EXEC_HI);
1585 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1586 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1587 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1588 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1589 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1590 case 251: return createRegOperand(SRC_VCCZ);
1591 case 252: return createRegOperand(SRC_EXECZ);
1592 case 253: return createRegOperand(SRC_SCC);
1593 case 254: return createRegOperand(LDS_DIRECT);
1594 default: break;
1595 // clang-format on
1596 }
1597 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1598}
1599
1601 using namespace AMDGPU;
1602
1603 switch (Val) {
1604 case 102: return createRegOperand(FLAT_SCR);
1605 case 104: return createRegOperand(XNACK_MASK);
1606 case 106: return createRegOperand(VCC);
1607 case 108: return createRegOperand(TBA);
1608 case 110: return createRegOperand(TMA);
1609 case 124:
1610 if (isGFX11Plus())
1611 return createRegOperand(SGPR_NULL);
1612 break;
1613 case 125:
1614 if (!isGFX11Plus())
1615 return createRegOperand(SGPR_NULL);
1616 break;
1617 case 126: return createRegOperand(EXEC);
1618 case 235: return createRegOperand(SRC_SHARED_BASE);
1619 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1620 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1621 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1622 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1623 case 251: return createRegOperand(SRC_VCCZ);
1624 case 252: return createRegOperand(SRC_EXECZ);
1625 case 253: return createRegOperand(SRC_SCC);
1626 default: break;
1627 }
1628 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1629}
1630
1632 const unsigned Val,
1633 unsigned ImmWidth) const {
1634 using namespace AMDGPU::SDWA;
1635 using namespace AMDGPU::EncValues;
1636
1637 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1638 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1639 // XXX: cast to int is needed to avoid stupid warning:
1640 // compare with unsigned is always true
1641 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1642 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1643 return createRegOperand(getVgprClassId(Width),
1644 Val - SDWA9EncValues::SRC_VGPR_MIN);
1645 }
1646 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1647 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1648 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1649 return createSRegOperand(getSgprClassId(Width),
1650 Val - SDWA9EncValues::SRC_SGPR_MIN);
1651 }
1652 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1653 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1654 return createSRegOperand(getTtmpClassId(Width),
1655 Val - SDWA9EncValues::SRC_TTMP_MIN);
1656 }
1657
1658 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1659
1660 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1661 return decodeIntImmed(SVal);
1662
1663 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1664 return decodeFPImmed(ImmWidth, SVal);
1665
1666 return decodeSpecialReg32(SVal);
1667 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1668 return createRegOperand(getVgprClassId(Width), Val);
1669 }
1670 llvm_unreachable("unsupported target");
1671}
1672
1674 return decodeSDWASrc(OPW16, Val, 16);
1675}
1676
1678 return decodeSDWASrc(OPW32, Val, 32);
1679}
1680
1682 using namespace AMDGPU::SDWA;
1683
1684 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1685 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1686 "SDWAVopcDst should be present only on GFX9+");
1687
1688 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1689
1690 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1691 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1692
1693 int TTmpIdx = getTTmpIdx(Val);
1694 if (TTmpIdx >= 0) {
1695 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1696 return createSRegOperand(TTmpClsId, TTmpIdx);
1697 } else if (Val > SGPR_MAX) {
1698 return IsWave64 ? decodeSpecialReg64(Val)
1699 : decodeSpecialReg32(Val);
1700 } else {
1701 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1702 }
1703 } else {
1704 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1705 }
1706}
1707
1709 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1710 ? decodeSrcOp(OPW64, Val)
1711 : decodeSrcOp(OPW32, Val);
1712}
1713
1715 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1716}
1717
1719
1721 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1722}
1723
1725
1727
1729 return AMDGPU::isGFX10Plus(STI);
1730}
1731
1733 return STI.hasFeature(AMDGPU::FeatureGFX11);
1734}
1735
1737 return AMDGPU::isGFX11Plus(STI);
1738}
1739
1741 return AMDGPU::isGFX12Plus(STI);
1742}
1743
1745 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1746}
1747
1750}
1751
1752//===----------------------------------------------------------------------===//
1753// AMDGPU specific symbol handling
1754//===----------------------------------------------------------------------===//
1755#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1756#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1757 do { \
1758 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1759 } while (0)
1760#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1761 do { \
1762 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1763 << GET_FIELD(MASK) << '\n'; \
1764 } while (0)
1765
1766// NOLINTNEXTLINE(readability-identifier-naming)
1768 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1769 using namespace amdhsa;
1770 StringRef Indent = "\t";
1771
1772 // We cannot accurately backward compute #VGPRs used from
1773 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1774 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1775 // simply calculate the inverse of what the assembler does.
1776
1777 uint32_t GranulatedWorkitemVGPRCount =
1778 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1779
1780 uint32_t NextFreeVGPR =
1781 (GranulatedWorkitemVGPRCount + 1) *
1782 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1783
1784 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1785
1786 // We cannot backward compute values used to calculate
1787 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1788 // directives can't be computed:
1789 // .amdhsa_reserve_vcc
1790 // .amdhsa_reserve_flat_scratch
1791 // .amdhsa_reserve_xnack_mask
1792 // They take their respective default values if not specified in the assembly.
1793 //
1794 // GRANULATED_WAVEFRONT_SGPR_COUNT
1795 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1796 //
1797 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1798 // are set to 0. So while disassembling we consider that:
1799 //
1800 // GRANULATED_WAVEFRONT_SGPR_COUNT
1801 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1802 //
1803 // The disassembler cannot recover the original values of those 3 directives.
1804
1805 uint32_t GranulatedWavefrontSGPRCount =
1806 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1807
1808 if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1809 return MCDisassembler::Fail;
1810
1811 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1813
1814 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1816 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1817 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1818 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1819
1820 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1821 return MCDisassembler::Fail;
1822
1823 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1824 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1825 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1826 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1827 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1828 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1829 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1830 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1831
1832 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1833 return MCDisassembler::Fail;
1834
1835 PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1836
1837 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1838 return MCDisassembler::Fail;
1839
1840 PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1841
1842 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1843 return MCDisassembler::Fail;
1844
1845 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1846 return MCDisassembler::Fail;
1847
1848 if (isGFX9Plus())
1849 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1850
1851 if (!isGFX9Plus())
1852 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
1853 return MCDisassembler::Fail;
1854 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
1855 return MCDisassembler::Fail;
1856 if (!isGFX10Plus())
1857 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
1858 return MCDisassembler::Fail;
1859
1860 if (isGFX10Plus()) {
1861 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1862 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1863 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1864 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1865 }
1867}
1868
1869// NOLINTNEXTLINE(readability-identifier-naming)
1871 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1872 using namespace amdhsa;
1873 StringRef Indent = "\t";
1875 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1876 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1877 else
1878 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1879 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1880 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1881 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1882 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1883 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1884 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1885 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1886 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1887 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1888 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1889 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1890
1891 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1892 return MCDisassembler::Fail;
1893
1894 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1895 return MCDisassembler::Fail;
1896
1897 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1898 return MCDisassembler::Fail;
1899
1901 ".amdhsa_exception_fp_ieee_invalid_op",
1902 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1903 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1904 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1906 ".amdhsa_exception_fp_ieee_div_zero",
1907 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1908 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1909 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1910 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1911 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1912 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1913 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1914 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1915 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1916
1917 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1918 return MCDisassembler::Fail;
1919
1921}
1922
1923// NOLINTNEXTLINE(readability-identifier-naming)
1925 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1926 using namespace amdhsa;
1927 StringRef Indent = "\t";
1928 if (isGFX90A()) {
1929 KdStream << Indent << ".amdhsa_accum_offset "
1930 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1931 << '\n';
1932 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
1933 return MCDisassembler::Fail;
1934 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1935 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
1936 return MCDisassembler::Fail;
1937 } else if (isGFX10Plus()) {
1938 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
1939 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
1940 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1941 } else {
1943 "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1944 }
1945
1946 if (isGFX11Plus()) {
1947 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1948 COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
1949 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1950 COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
1951 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1952 COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
1953 } else {
1954 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
1955 return MCDisassembler::Fail;
1956 }
1957
1958 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
1959 return MCDisassembler::Fail;
1960
1961 if (isGFX11Plus()) {
1963 COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
1964 } else {
1965 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
1966 return MCDisassembler::Fail;
1967 }
1968 } else if (FourByteBuffer) {
1969 return MCDisassembler::Fail;
1970 }
1972}
1973#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
1974#undef PRINT_DIRECTIVE
1975#undef GET_FIELD
1976
1980 raw_string_ostream &KdStream) const {
1981#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1982 do { \
1983 KdStream << Indent << DIRECTIVE " " \
1984 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
1985 } while (0)
1986
1987 uint16_t TwoByteBuffer = 0;
1988 uint32_t FourByteBuffer = 0;
1989
1990 StringRef ReservedBytes;
1991 StringRef Indent = "\t";
1992
1993 assert(Bytes.size() == 64);
1994 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1995
1996 switch (Cursor.tell()) {
1998 FourByteBuffer = DE.getU32(Cursor);
1999 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2000 << '\n';
2002
2004 FourByteBuffer = DE.getU32(Cursor);
2005 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2006 << FourByteBuffer << '\n';
2008
2010 FourByteBuffer = DE.getU32(Cursor);
2011 KdStream << Indent << ".amdhsa_kernarg_size "
2012 << FourByteBuffer << '\n';
2014
2016 // 4 reserved bytes, must be 0.
2017 ReservedBytes = DE.getBytes(Cursor, 4);
2018 for (int I = 0; I < 4; ++I) {
2019 if (ReservedBytes[I] != 0) {
2020 return MCDisassembler::Fail;
2021 }
2022 }
2024
2026 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2027 // So far no directive controls this for Code Object V3, so simply skip for
2028 // disassembly.
2029 DE.skip(Cursor, 8);
2031
2033 // 20 reserved bytes, must be 0.
2034 ReservedBytes = DE.getBytes(Cursor, 20);
2035 for (int I = 0; I < 20; ++I) {
2036 if (ReservedBytes[I] != 0) {
2037 return MCDisassembler::Fail;
2038 }
2039 }
2041
2043 FourByteBuffer = DE.getU32(Cursor);
2044 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2045
2047 FourByteBuffer = DE.getU32(Cursor);
2048 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2049
2051 FourByteBuffer = DE.getU32(Cursor);
2052 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2053
2055 using namespace amdhsa;
2056 TwoByteBuffer = DE.getU16(Cursor);
2057
2059 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2060 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2061 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2062 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2063 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2064 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2065 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2066 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2067 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2068 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2070 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2071 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2072 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2073 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2074
2075 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2076 return MCDisassembler::Fail;
2077
2078 // Reserved for GFX9
2079 if (isGFX9() &&
2080 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2081 return MCDisassembler::Fail;
2082 } else if (isGFX10Plus()) {
2083 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2084 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2085 }
2086
2088 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2089 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2090
2091 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2092 return MCDisassembler::Fail;
2093
2095
2097 using namespace amdhsa;
2098 TwoByteBuffer = DE.getU16(Cursor);
2099 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2100 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2101 KERNARG_PRELOAD_SPEC_LENGTH);
2102 }
2103
2104 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2105 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2106 KERNARG_PRELOAD_SPEC_OFFSET);
2107 }
2109
2111 // 4 bytes from here are reserved, must be 0.
2112 ReservedBytes = DE.getBytes(Cursor, 4);
2113 for (int I = 0; I < 4; ++I) {
2114 if (ReservedBytes[I] != 0)
2115 return MCDisassembler::Fail;
2116 }
2118
2119 default:
2120 llvm_unreachable("Unhandled index. Case statements cover everything.");
2121 return MCDisassembler::Fail;
2122 }
2123#undef PRINT_DIRECTIVE
2124}
2125
2127 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2128 // CP microcode requires the kernel descriptor to be 64 aligned.
2129 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2130 return MCDisassembler::Fail;
2131
2132 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2133 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2134 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2135 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2136 // when required.
2137 if (isGFX10Plus()) {
2138 uint16_t KernelCodeProperties =
2141 EnableWavefrontSize32 =
2142 AMDHSA_BITS_GET(KernelCodeProperties,
2143 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2144 }
2145
2146 std::string Kd;
2147 raw_string_ostream KdStream(Kd);
2148 KdStream << ".amdhsa_kernel " << KdName << '\n';
2149
2151 while (C && C.tell() < Bytes.size()) {
2153 decodeKernelDescriptorDirective(C, Bytes, KdStream);
2154
2155 cantFail(C.takeError());
2156
2158 return MCDisassembler::Fail;
2159 }
2160 KdStream << ".end_amdhsa_kernel\n";
2161 outs() << KdStream.str();
2163}
2164
2165std::optional<MCDisassembler::DecodeStatus>
2168 raw_ostream &CStream) const {
2169 // Right now only kernel descriptor needs to be handled.
2170 // We ignore all other symbols for target specific handling.
2171 // TODO:
2172 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2173 // Object V2 and V3 when symbols are marked protected.
2174
2175 // amd_kernel_code_t for Code Object V2.
2176 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2177 Size = 256;
2178 return MCDisassembler::Fail;
2179 }
2180
2181 // Code Object V3 kernel descriptors.
2182 StringRef Name = Symbol.Name;
2183 if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
2184 Size = 64; // Size = 64 regardless of success or failure.
2185 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2186 }
2187 return std::nullopt;
2188}
2189
2190//===----------------------------------------------------------------------===//
2191// AMDGPUSymbolizer
2192//===----------------------------------------------------------------------===//
2193
2194// Try to find symbol name for specified label
2196 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2197 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2198 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2199
2200 if (!IsBranch) {
2201 return false;
2202 }
2203
2204 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2205 if (!Symbols)
2206 return false;
2207
2208 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2209 return Val.Addr == static_cast<uint64_t>(Value) &&
2210 Val.Type == ELF::STT_NOTYPE;
2211 });
2212 if (Result != Symbols->end()) {
2213 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2214 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2216 return true;
2217 }
2218 // Add to list of referenced addresses, so caller can synthesize a label.
2219 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2220 return false;
2221}
2222
2224 int64_t Value,
2225 uint64_t Address) {
2226 llvm_unreachable("unimplemented");
2227}
2228
2229//===----------------------------------------------------------------------===//
2230// Initialization
2231//===----------------------------------------------------------------------===//
2232
2234 LLVMOpInfoCallback /*GetOpInfo*/,
2235 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2236 void *DisInfo,
2237 MCContext *Ctx,
2238 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2239 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2240}
2241
2243 const MCSubtargetInfo &STI,
2244 MCContext &Ctx) {
2245 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2246}
2247
2253}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static bool isValidDPP8(const MCInst &MI)
static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecodeStatus DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)
#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)
#define DECODE_OPERAND_REG_8(RegClass)
#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)
static int64_t getInlineImmVal16(unsigned Imm)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
#define GET_FIELD(MASK)
#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:477
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm)
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
DecodeStatus decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
DecodeStatus convertSDWAInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
DecodeStatus convertVOP3DPPInst(MCInst &MI) const
DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, bool IsFP=false) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, bool IsFP=false) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
DecodeStatus convertMIMGInst(MCInst &MI) const
DecodeStatus convertVINTERPInst(MCInst &MI) const
DecodeStatus convertDPP8Inst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus convertEXPInst(MCInst &MI) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const override
Used to perform separate target specific disassembly for a particular symbol.
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth=0) const
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const
DecodeStatus convertVOPCDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSDWASrc16(unsigned Val) const
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Context object for machine code objects.
Definition: MCContext.h:76
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:389
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:660
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
unsigned getAmdhsaCodeObjectVersion()
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:198
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:197
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1258
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1272
@ STT_OBJECT
Definition: ELF.h:1259
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:386
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:749
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.