LLVM 19.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
46
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
52 // ToDo: AMDGPUDisassembler supports only VI ISA.
53 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
54 report_fatal_error("Disassembly not yet supported for subtarget");
55}
56
57void AMDGPUDisassembler::setABIVersion(unsigned Version) {
58 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
59}
60
62addOperand(MCInst &Inst, const MCOperand& Opnd) {
63 Inst.addOperand(Opnd);
64 return Opnd.isValid() ?
67}
68
70 uint16_t NameIdx) {
71 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
72 if (OpIdx != -1) {
73 auto I = MI.begin();
74 std::advance(I, OpIdx);
75 MI.insert(I, Op);
76 }
77 return OpIdx;
78}
79
80static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
82 const MCDisassembler *Decoder) {
83 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
84
85 // Our branches take a simm16, but we need two extra bits to account for the
86 // factor of 4.
87 APInt SignedOffset(18, Imm * 4, true);
88 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
89
90 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
92 return addOperand(Inst, MCOperand::createImm(Imm));
93}
94
95static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
98 int64_t Offset;
99 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
100 Offset = SignExtend64<24>(Imm);
101 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
102 Offset = Imm & 0xFFFFF;
103 } else { // GFX9+ supports 21-bit signed offsets.
104 Offset = SignExtend64<21>(Imm);
105 }
107}
108
109static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
112 return addOperand(Inst, DAsm->decodeBoolReg(Val));
113}
114
115static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
117 const MCDisassembler *Decoder) {
118 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
119 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
120}
121
122static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
123 const MCDisassembler *Decoder) {
124 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
125 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
126}
127
128#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
129 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
130 uint64_t /*Addr*/, \
131 const MCDisassembler *Decoder) { \
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
133 return addOperand(Inst, DAsm->DecoderName(Imm)); \
134 }
135
136// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
137// number of register. Used by VGPR only and AGPR only operands.
138#define DECODE_OPERAND_REG_8(RegClass) \
139 static DecodeStatus Decode##RegClass##RegisterClass( \
140 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
141 const MCDisassembler *Decoder) { \
142 assert(Imm < (1 << 8) && "8-bit encoding"); \
143 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
144 return addOperand( \
145 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
146 }
147
148#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
149 ImmWidth) \
150 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
151 const MCDisassembler *Decoder) { \
152 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
153 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
154 return addOperand(Inst, \
155 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
156 MandatoryLiteral, ImmWidth)); \
157 }
158
159static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
161 unsigned Imm, unsigned EncImm,
162 bool MandatoryLiteral, unsigned ImmWidth,
164 const MCDisassembler *Decoder) {
165 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
168 ImmWidth, Sema));
169}
170
171// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
172// get register class. Used by SGPR only operands.
173#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
174 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
175
176// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
177// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
178// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
179// Used by AV_ register classes (AGPR or VGPR only register operands).
180template <AMDGPUDisassembler::OpWidthTy OpWidth>
181static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
182 const MCDisassembler *Decoder) {
183 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
184 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
185}
186
187// Decoder for Src(9-bit encoding) registers only.
188template <AMDGPUDisassembler::OpWidthTy OpWidth>
189static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
190 uint64_t /* Addr */,
191 const MCDisassembler *Decoder) {
192 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
194}
195
196// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
197// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
198// only.
199template <AMDGPUDisassembler::OpWidthTy OpWidth>
200static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
204}
205
206// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
207// Imm{9} is acc, registers only.
208template <AMDGPUDisassembler::OpWidthTy OpWidth>
209static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
210 uint64_t /* Addr */,
211 const MCDisassembler *Decoder) {
212 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
214}
215
216// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
217// register from RegClass or immediate. Registers that don't belong to RegClass
218// will be decoded and InstPrinter will report warning. Immediate will be
219// decoded into constant of size ImmWidth, should match width of immediate used
220// by OperandType (important for floating point types).
221template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
222 unsigned OperandSemantics>
223static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
224 uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
227 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
228}
229
230// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
231// and decode using 'enum10' from decodeSrcOp.
232template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
233 unsigned OperandSemantics>
234static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
235 uint64_t /* Addr */,
236 const MCDisassembler *Decoder) {
237 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
238 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
239}
240
241template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
242 unsigned OperandSemantics>
244 uint64_t /* Addr */,
245 const MCDisassembler *Decoder) {
246 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
247 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
248}
249
250// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
251// when RegisterClass is used as an operand. Most often used for destination
252// operands.
253
255DECODE_OPERAND_REG_8(VGPR_32_Lo128)
258DECODE_OPERAND_REG_8(VReg_128)
259DECODE_OPERAND_REG_8(VReg_256)
260DECODE_OPERAND_REG_8(VReg_288)
261DECODE_OPERAND_REG_8(VReg_352)
262DECODE_OPERAND_REG_8(VReg_384)
263DECODE_OPERAND_REG_8(VReg_512)
264DECODE_OPERAND_REG_8(VReg_1024)
265
266DECODE_OPERAND_REG_7(SReg_32, OPW32)
267DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
268DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
269DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
270DECODE_OPERAND_REG_7(SReg_64, OPW64)
271DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
272DECODE_OPERAND_REG_7(SReg_96, OPW96)
273DECODE_OPERAND_REG_7(SReg_128, OPW128)
274DECODE_OPERAND_REG_7(SReg_256, OPW256)
275DECODE_OPERAND_REG_7(SReg_512, OPW512)
276
279DECODE_OPERAND_REG_8(AReg_128)
280DECODE_OPERAND_REG_8(AReg_256)
281DECODE_OPERAND_REG_8(AReg_512)
282DECODE_OPERAND_REG_8(AReg_1024)
283
285 uint64_t /*Addr*/,
286 const MCDisassembler *Decoder) {
287 assert(isUInt<10>(Imm) && "10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
289
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
292 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
293 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
294}
295
296static DecodeStatus
298 const MCDisassembler *Decoder) {
299 assert(isUInt<8>(Imm) && "8-bit encoding expected");
300
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
303 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
304 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
305}
306
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 bool IsVGPR = Imm & (1 << 8);
314 if (IsVGPR) {
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318 }
319 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
320 Imm & 0xFF, false, 16));
321}
322
323static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
324 uint64_t /*Addr*/,
325 const MCDisassembler *Decoder) {
326 assert(isUInt<10>(Imm) && "10-bit encoding expected");
327
328 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
329 bool IsVGPR = Imm & (1 << 8);
330 if (IsVGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
336 Imm & 0xFF, false, 16));
337}
338
339static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
341 const MCDisassembler *Decoder) {
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
344}
345
346static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
347 uint64_t Addr, const void *Decoder) {
348 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
349 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
350}
351
352static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
353 const MCRegisterInfo *MRI) {
354 if (OpIdx < 0)
355 return false;
356
357 const MCOperand &Op = Inst.getOperand(OpIdx);
358 if (!Op.isReg())
359 return false;
360
361 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
362 auto Reg = Sub ? Sub : Op.getReg();
363 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
364}
365
366static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
368 const MCDisassembler *Decoder) {
369 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
370 if (!DAsm->isGFX90A()) {
371 Imm &= 511;
372 } else {
373 // If atomic has both vdata and vdst their register classes are tied.
374 // The bit is decoded along with the vdst, first operand. We need to
375 // change register class to AGPR if vdst was AGPR.
376 // If a DS instruction has both data0 and data1 their register classes
377 // are also tied.
378 unsigned Opc = Inst.getOpcode();
379 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
380 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
381 : AMDGPU::OpName::vdata;
382 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
383 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
384 if ((int)Inst.getNumOperands() == DataIdx) {
385 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
386 if (IsAGPROperand(Inst, DstIdx, MRI))
387 Imm |= 512;
388 }
389
390 if (TSFlags & SIInstrFlags::DS) {
391 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
392 if ((int)Inst.getNumOperands() == Data2Idx &&
393 IsAGPROperand(Inst, DataIdx, MRI))
394 Imm |= 512;
395 }
396 }
397 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
398}
399
400template <AMDGPUDisassembler::OpWidthTy Opw>
401static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
402 uint64_t /* Addr */,
403 const MCDisassembler *Decoder) {
404 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
405}
406
407static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
409 const MCDisassembler *Decoder) {
410 assert(Imm < (1 << 9) && "9-bit encoding");
411 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
412 return addOperand(Inst,
413 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
415}
416
417#define DECODE_SDWA(DecName) \
418DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
419
420DECODE_SDWA(Src32)
421DECODE_SDWA(Src16)
422DECODE_SDWA(VopcDst)
423
424#include "AMDGPUGenDisassemblerTables.inc"
425
426//===----------------------------------------------------------------------===//
427//
428//===----------------------------------------------------------------------===//
429
430template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
431 assert(Bytes.size() >= sizeof(T));
432 const auto Res =
433 support::endian::read<T, llvm::endianness::little>(Bytes.data());
434 Bytes = Bytes.slice(sizeof(T));
435 return Res;
436}
437
439 assert(Bytes.size() >= 12);
440 uint64_t Lo =
441 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
442 Bytes = Bytes.slice(8);
443 uint64_t Hi =
444 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
445 Bytes = Bytes.slice(4);
446 return DecoderUInt128(Lo, Hi);
447}
448
450 ArrayRef<uint8_t> Bytes_,
452 raw_ostream &CS) const {
453 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
454 Bytes = Bytes_.slice(0, MaxInstBytesNum);
455
456 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
457 // there are fewer bytes left). This will be overridden on success.
458 Size = std::min((size_t)4, Bytes_.size());
459
460 do {
461 // ToDo: better to switch encoding length using some bit predicate
462 // but it is unknown yet, so try all we can
463
464 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
465 // encodings
466 if (isGFX11Plus() && Bytes.size() >= 12 ) {
467 DecoderUInt128 DecW = eat12Bytes(Bytes);
468
469 if (isGFX11() &&
470 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
471 DecW, Address, CS))
472 break;
473
474 if (isGFX12() &&
475 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
476 DecW, Address, CS))
477 break;
478
479 if (isGFX12() &&
480 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
481 break;
482 }
483
484 // Reinitialize Bytes
485 Bytes = Bytes_.slice(0, MaxInstBytesNum);
486
487 if (Bytes.size() >= 8) {
488 const uint64_t QW = eatBytes<uint64_t>(Bytes);
489
490 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
491 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
492 break;
493
494 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
495 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
496 break;
497
498 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
499 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
500 // table first so we print the correct name.
501 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
502 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
503 break;
504
505 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
506 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
507 break;
508
509 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
510 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
511 break;
512
513 if ((isVI() || isGFX9()) &&
514 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
515 break;
516
517 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
518 break;
519
520 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
521 break;
522
523 if (isGFX12() &&
524 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
525 Address, CS))
526 break;
527
528 if (isGFX11() &&
529 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
530 Address, CS))
531 break;
532
533 if (isGFX11() &&
534 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
535 break;
536
537 if (isGFX12() &&
538 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
539 break;
540 }
541
542 // Reinitialize Bytes
543 Bytes = Bytes_.slice(0, MaxInstBytesNum);
544
545 // Try decode 32-bit instruction
546 if (Bytes.size() >= 4) {
547 const uint32_t DW = eatBytes<uint32_t>(Bytes);
548
549 if ((isVI() || isGFX9()) &&
550 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
551 break;
552
553 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
554 break;
555
556 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
557 break;
558
559 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
560 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
561 break;
562
563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
564 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
565 break;
566
567 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
568 break;
569
570 if (isGFX11() &&
571 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
572 Address, CS))
573 break;
574
575 if (isGFX12() &&
576 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
577 Address, CS))
578 break;
579 }
580
582 } while (false);
583
584 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
585 if (isMacDPP(MI))
587
588 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
590 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
591 AMDGPU::isVOPC64DPP(MI.getOpcode()))
592 convertVOPCDPPInst(MI); // Special VOP3 case
593 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
594 -1)
596 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
597 convertVOP3DPPInst(MI); // Regular VOP3 case
598 }
599
600 if (AMDGPU::isMAC(MI.getOpcode())) {
601 // Insert dummy unused src2_modifiers.
603 AMDGPU::OpName::src2_modifiers);
604 }
605
606 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
607 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
608 // Insert dummy unused src2_modifiers.
610 AMDGPU::OpName::src2_modifiers);
611 }
612
613 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
615 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
616 }
617
618 if (MCII->get(MI.getOpcode()).TSFlags &
620 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
621 AMDGPU::OpName::cpol);
622 if (CPolPos != -1) {
623 unsigned CPol =
624 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
626 if (MI.getNumOperands() <= (unsigned)CPolPos) {
628 AMDGPU::OpName::cpol);
629 } else if (CPol) {
630 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
631 }
632 }
633 }
634
635 if ((MCII->get(MI.getOpcode()).TSFlags &
637 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
638 // GFX90A lost TFE, its place is occupied by ACC.
639 int TFEOpIdx =
640 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
641 if (TFEOpIdx != -1) {
642 auto TFEIter = MI.begin();
643 std::advance(TFEIter, TFEOpIdx);
644 MI.insert(TFEIter, MCOperand::createImm(0));
645 }
646 }
647
648 if (MCII->get(MI.getOpcode()).TSFlags &
650 int SWZOpIdx =
651 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
652 if (SWZOpIdx != -1) {
653 auto SWZIter = MI.begin();
654 std::advance(SWZIter, SWZOpIdx);
655 MI.insert(SWZIter, MCOperand::createImm(0));
656 }
657 }
658
659 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
660 int VAddr0Idx =
661 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
662 int RsrcIdx =
663 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
664 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
665 if (VAddr0Idx >= 0 && NSAArgs > 0) {
666 unsigned NSAWords = (NSAArgs + 3) / 4;
667 if (Bytes.size() < 4 * NSAWords)
669 for (unsigned i = 0; i < NSAArgs; ++i) {
670 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
671 auto VAddrRCID =
672 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
673 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
674 }
675 Bytes = Bytes.slice(4 * NSAWords);
676 }
677
679 }
680
681 if (MCII->get(MI.getOpcode()).TSFlags &
684
685 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
687
688 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
690
691 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
693
694 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
695 AMDGPU::OpName::vdst_in);
696 if (VDstIn_Idx != -1) {
697 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
699 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
700 !MI.getOperand(VDstIn_Idx).isReg() ||
701 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
702 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
703 MI.erase(&MI.getOperand(VDstIn_Idx));
705 MCOperand::createReg(MI.getOperand(Tied).getReg()),
706 AMDGPU::OpName::vdst_in);
707 }
708 }
709
710 int ImmLitIdx =
711 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
712 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
713 if (ImmLitIdx != -1 && !IsSOPK)
714 convertFMAanyK(MI, ImmLitIdx);
715
716 Size = MaxInstBytesNum - Bytes.size();
718}
719
721 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
722 // The MCInst still has these fields even though they are no longer encoded
723 // in the GFX11 instruction.
724 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
725 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
726 }
727}
728
730 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
731 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
732 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
733 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
734 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
735 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
736 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
737 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
738 // The MCInst has this field that is not directly encoded in the
739 // instruction.
740 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
741 }
742}
743
745 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
746 STI.hasFeature(AMDGPU::FeatureGFX10)) {
747 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
748 // VOPC - insert clamp
749 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
750 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
751 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
752 if (SDst != -1) {
753 // VOPC - insert VCC register as sdst
755 AMDGPU::OpName::sdst);
756 } else {
757 // VOP1/2 - insert omod if present in instruction
758 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
759 }
760 }
761}
762
764 unsigned OpSel = 0;
765 unsigned OpSelHi = 0;
766 unsigned NegLo = 0;
767 unsigned NegHi = 0;
768};
769
770// Reconstruct values of VOP3/VOP3P operands such as op_sel.
771// Note that these values do not affect disassembler output,
772// so this is only necessary for consistency with src_modifiers.
774 bool IsVOP3P = false) {
775 VOPModifiers Modifiers;
776 unsigned Opc = MI.getOpcode();
777 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
778 AMDGPU::OpName::src1_modifiers,
779 AMDGPU::OpName::src2_modifiers};
780 for (int J = 0; J < 3; ++J) {
781 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
782 if (OpIdx == -1)
783 continue;
784
785 unsigned Val = MI.getOperand(OpIdx).getImm();
786
787 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
788 if (IsVOP3P) {
789 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
790 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
791 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
792 } else if (J == 0) {
793 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
794 }
795 }
796
797 return Modifiers;
798}
799
800// Instructions decode the op_sel/suffix bits into the src_modifier
801// operands. Copy those bits into the src operands for true16 VGPRs.
803 const unsigned Opc = MI.getOpcode();
804 const MCRegisterClass &ConversionRC =
805 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
806 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
807 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
809 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
811 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
813 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
815 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
816 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
817 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
818 if (OpIdx == -1 || OpModsIdx == -1)
819 continue;
820 MCOperand &Op = MI.getOperand(OpIdx);
821 if (!Op.isReg())
822 continue;
823 if (!ConversionRC.contains(Op.getReg()))
824 continue;
825 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
826 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
827 unsigned ModVal = OpMods.getImm();
828 if (ModVal & OpSelMask) { // isHi
829 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
830 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
831 }
832 }
833}
834
835// MAC opcodes have special old and src2 operands.
836// src2 is tied to dst, while old is not tied (but assumed to be).
838 constexpr int DST_IDX = 0;
839 auto Opcode = MI.getOpcode();
840 const auto &Desc = MCII->get(Opcode);
841 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
842
843 if (OldIdx != -1 && Desc.getOperandConstraint(
844 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
845 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
846 assert(Desc.getOperandConstraint(
847 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
849 (void)DST_IDX;
850 return true;
851 }
852
853 return false;
854}
855
856// Create dummy old operand and insert dummy unused src2_modifiers
858 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
859 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
861 AMDGPU::OpName::src2_modifiers);
862}
863
865 unsigned Opc = MI.getOpcode();
866
867 int VDstInIdx =
868 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
869 if (VDstInIdx != -1)
870 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
871
872 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
873 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
874 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
875
876 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
877 if (MI.getNumOperands() < DescNumOps &&
878 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
880 auto Mods = collectVOPModifiers(MI);
882 AMDGPU::OpName::op_sel);
883 } else {
884 // Insert dummy unused src modifiers.
885 if (MI.getNumOperands() < DescNumOps &&
886 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
888 AMDGPU::OpName::src0_modifiers);
889
890 if (MI.getNumOperands() < DescNumOps &&
891 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
893 AMDGPU::OpName::src1_modifiers);
894 }
895}
896
899
900 int VDstInIdx =
901 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
902 if (VDstInIdx != -1)
903 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
904
905 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
906 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
907 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
908
909 unsigned Opc = MI.getOpcode();
910 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
911 if (MI.getNumOperands() < DescNumOps &&
912 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
913 auto Mods = collectVOPModifiers(MI);
915 AMDGPU::OpName::op_sel);
916 }
917}
918
919// Note that before gfx10, the MIMG encoding provided no information about
920// VADDR size. Consequently, decoded instructions always show address as if it
921// has 1 dword, which could be not really so.
923 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
924
925 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
926 AMDGPU::OpName::vdst);
927
928 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
929 AMDGPU::OpName::vdata);
930 int VAddr0Idx =
931 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
932 int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
933 : AMDGPU::OpName::rsrc;
934 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
935 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
936 AMDGPU::OpName::dmask);
937
938 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
939 AMDGPU::OpName::tfe);
940 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
941 AMDGPU::OpName::d16);
942
943 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
944 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
946
947 assert(VDataIdx != -1);
948 if (BaseOpcode->BVH) {
949 // Add A16 operand for intersect_ray instructions
950 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
951 return;
952 }
953
954 bool IsAtomic = (VDstIdx != -1);
955 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
956 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
957 bool IsNSA = false;
958 bool IsPartialNSA = false;
959 unsigned AddrSize = Info->VAddrDwords;
960
961 if (isGFX10Plus()) {
962 unsigned DimIdx =
963 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
964 int A16Idx =
965 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
966 const AMDGPU::MIMGDimInfo *Dim =
967 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
968 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
969
970 AddrSize =
971 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
972
973 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
974 // VIMAGE insts other than BVH never use vaddr4.
975 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
976 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
977 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
978 if (!IsNSA) {
979 if (!IsVSample && AddrSize > 12)
980 AddrSize = 16;
981 } else {
982 if (AddrSize > Info->VAddrDwords) {
983 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
984 // The NSA encoding does not contain enough operands for the
985 // combination of base opcode / dimension. Should this be an error?
986 return;
987 }
988 IsPartialNSA = true;
989 }
990 }
991 }
992
993 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
994 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
995
996 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
997 if (D16 && AMDGPU::hasPackedD16(STI)) {
998 DstSize = (DstSize + 1) / 2;
999 }
1000
1001 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1002 DstSize += 1;
1003
1004 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1005 return;
1006
1007 int NewOpcode =
1008 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1009 if (NewOpcode == -1)
1010 return;
1011
1012 // Widen the register to the correct number of enabled channels.
1013 unsigned NewVdata = AMDGPU::NoRegister;
1014 if (DstSize != Info->VDataDwords) {
1015 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1016
1017 // Get first subregister of VData
1018 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1019 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1020 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1021
1022 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1023 &MRI.getRegClass(DataRCID));
1024 if (NewVdata == AMDGPU::NoRegister) {
1025 // It's possible to encode this such that the low register + enabled
1026 // components exceeds the register count.
1027 return;
1028 }
1029 }
1030
1031 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1032 // If using partial NSA on GFX11+ widen last address register.
1033 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1034 unsigned NewVAddrSA = AMDGPU::NoRegister;
1035 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1036 AddrSize != Info->VAddrDwords) {
1037 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1038 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1039 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1040
1041 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1042 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1043 &MRI.getRegClass(AddrRCID));
1044 if (!NewVAddrSA)
1045 return;
1046 }
1047
1048 MI.setOpcode(NewOpcode);
1049
1050 if (NewVdata != AMDGPU::NoRegister) {
1051 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1052
1053 if (IsAtomic) {
1054 // Atomic operations have an additional operand (a copy of data)
1055 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1056 }
1057 }
1058
1059 if (NewVAddrSA) {
1060 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1061 } else if (IsNSA) {
1062 assert(AddrSize <= Info->VAddrDwords);
1063 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1064 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1065 }
1066}
1067
1068// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1069// decoder only adds to src_modifiers, so manually add the bits to the other
1070// operands.
1072 unsigned Opc = MI.getOpcode();
1073 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1074 auto Mods = collectVOPModifiers(MI, true);
1075
1076 if (MI.getNumOperands() < DescNumOps &&
1077 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1078 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1079
1080 if (MI.getNumOperands() < DescNumOps &&
1081 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1083 AMDGPU::OpName::op_sel);
1084 if (MI.getNumOperands() < DescNumOps &&
1085 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1087 AMDGPU::OpName::op_sel_hi);
1088 if (MI.getNumOperands() < DescNumOps &&
1089 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1091 AMDGPU::OpName::neg_lo);
1092 if (MI.getNumOperands() < DescNumOps &&
1093 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1095 AMDGPU::OpName::neg_hi);
1096}
1097
1098// Create dummy old operand and insert optional operands
1100 unsigned Opc = MI.getOpcode();
1101 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1102
1103 if (MI.getNumOperands() < DescNumOps &&
1104 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1105 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1106
1107 if (MI.getNumOperands() < DescNumOps &&
1108 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1110 AMDGPU::OpName::src0_modifiers);
1111
1112 if (MI.getNumOperands() < DescNumOps &&
1113 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1115 AMDGPU::OpName::src1_modifiers);
1116}
1117
1118void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1119 assert(HasLiteral && "Should have decoded a literal");
1120 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1121 unsigned DescNumOps = Desc.getNumOperands();
1123 AMDGPU::OpName::immDeferred);
1124 assert(DescNumOps == MI.getNumOperands());
1125 for (unsigned I = 0; I < DescNumOps; ++I) {
1126 auto &Op = MI.getOperand(I);
1127 auto OpType = Desc.operands()[I].OperandType;
1128 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1130 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1131 IsDeferredOp)
1132 Op.setImm(Literal);
1133 }
1134}
1135
1136const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1137 return getContext().getRegisterInfo()->
1138 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1139}
1140
1141inline
1143 const Twine& ErrMsg) const {
1144 *CommentStream << "Error: " + ErrMsg;
1145
1146 // ToDo: add support for error operands to MCInst.h
1147 // return MCOperand::createError(V);
1148 return MCOperand();
1149}
1150
1151inline
1154}
1155
1156inline
1158 unsigned Val) const {
1159 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1160 if (Val >= RegCl.getNumRegs())
1161 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1162 ": unknown register " + Twine(Val));
1163 return createRegOperand(RegCl.getRegister(Val));
1164}
1165
1166inline
1168 unsigned Val) const {
1169 // ToDo: SI/CI have 104 SGPRs, VI - 102
1170 // Valery: here we accepting as much as we can, let assembler sort it out
1171 int shift = 0;
1172 switch (SRegClassID) {
1173 case AMDGPU::SGPR_32RegClassID:
1174 case AMDGPU::TTMP_32RegClassID:
1175 break;
1176 case AMDGPU::SGPR_64RegClassID:
1177 case AMDGPU::TTMP_64RegClassID:
1178 shift = 1;
1179 break;
1180 case AMDGPU::SGPR_96RegClassID:
1181 case AMDGPU::TTMP_96RegClassID:
1182 case AMDGPU::SGPR_128RegClassID:
1183 case AMDGPU::TTMP_128RegClassID:
1184 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1185 // this bundle?
1186 case AMDGPU::SGPR_256RegClassID:
1187 case AMDGPU::TTMP_256RegClassID:
1188 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1189 // this bundle?
1190 case AMDGPU::SGPR_288RegClassID:
1191 case AMDGPU::TTMP_288RegClassID:
1192 case AMDGPU::SGPR_320RegClassID:
1193 case AMDGPU::TTMP_320RegClassID:
1194 case AMDGPU::SGPR_352RegClassID:
1195 case AMDGPU::TTMP_352RegClassID:
1196 case AMDGPU::SGPR_384RegClassID:
1197 case AMDGPU::TTMP_384RegClassID:
1198 case AMDGPU::SGPR_512RegClassID:
1199 case AMDGPU::TTMP_512RegClassID:
1200 shift = 2;
1201 break;
1202 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1203 // this bundle?
1204 default:
1205 llvm_unreachable("unhandled register class");
1206 }
1207
1208 if (Val % (1 << shift)) {
1209 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1210 << ": scalar reg isn't aligned " << Val;
1211 }
1212
1213 return createRegOperand(SRegClassID, Val >> shift);
1214}
1215
1217 bool IsHi) const {
1218 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1219 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1220}
1221
1222// Decode Literals for insts which always have a literal in the encoding
1225 if (HasLiteral) {
1226 assert(
1228 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1229 if (Literal != Val)
1230 return errOperand(Val, "More than one unique literal is illegal");
1231 }
1232 HasLiteral = true;
1233 Literal = Val;
1234 return MCOperand::createImm(Literal);
1235}
1236
1238 // For now all literal constants are supposed to be unsigned integer
1239 // ToDo: deal with signed/unsigned 64-bit integer constants
1240 // ToDo: deal with float/double constants
1241 if (!HasLiteral) {
1242 if (Bytes.size() < 4) {
1243 return errOperand(0, "cannot read literal, inst bytes left " +
1244 Twine(Bytes.size()));
1245 }
1246 HasLiteral = true;
1247 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1248 if (ExtendFP64)
1249 Literal64 <<= 32;
1250 }
1251 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1252}
1253
1255 using namespace AMDGPU::EncValues;
1256
1257 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1258 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1259 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1260 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1261 // Cast prevents negative overflow.
1262}
1263
1264static int64_t getInlineImmVal32(unsigned Imm) {
1265 switch (Imm) {
1266 case 240:
1267 return llvm::bit_cast<uint32_t>(0.5f);
1268 case 241:
1269 return llvm::bit_cast<uint32_t>(-0.5f);
1270 case 242:
1271 return llvm::bit_cast<uint32_t>(1.0f);
1272 case 243:
1273 return llvm::bit_cast<uint32_t>(-1.0f);
1274 case 244:
1275 return llvm::bit_cast<uint32_t>(2.0f);
1276 case 245:
1277 return llvm::bit_cast<uint32_t>(-2.0f);
1278 case 246:
1279 return llvm::bit_cast<uint32_t>(4.0f);
1280 case 247:
1281 return llvm::bit_cast<uint32_t>(-4.0f);
1282 case 248: // 1 / (2 * PI)
1283 return 0x3e22f983;
1284 default:
1285 llvm_unreachable("invalid fp inline imm");
1286 }
1287}
1288
1289static int64_t getInlineImmVal64(unsigned Imm) {
1290 switch (Imm) {
1291 case 240:
1292 return llvm::bit_cast<uint64_t>(0.5);
1293 case 241:
1294 return llvm::bit_cast<uint64_t>(-0.5);
1295 case 242:
1296 return llvm::bit_cast<uint64_t>(1.0);
1297 case 243:
1298 return llvm::bit_cast<uint64_t>(-1.0);
1299 case 244:
1300 return llvm::bit_cast<uint64_t>(2.0);
1301 case 245:
1302 return llvm::bit_cast<uint64_t>(-2.0);
1303 case 246:
1304 return llvm::bit_cast<uint64_t>(4.0);
1305 case 247:
1306 return llvm::bit_cast<uint64_t>(-4.0);
1307 case 248: // 1 / (2 * PI)
1308 return 0x3fc45f306dc9c882;
1309 default:
1310 llvm_unreachable("invalid fp inline imm");
1311 }
1312}
1313
1314static int64_t getInlineImmValF16(unsigned Imm) {
1315 switch (Imm) {
1316 case 240:
1317 return 0x3800;
1318 case 241:
1319 return 0xB800;
1320 case 242:
1321 return 0x3C00;
1322 case 243:
1323 return 0xBC00;
1324 case 244:
1325 return 0x4000;
1326 case 245:
1327 return 0xC000;
1328 case 246:
1329 return 0x4400;
1330 case 247:
1331 return 0xC400;
1332 case 248: // 1 / (2 * PI)
1333 return 0x3118;
1334 default:
1335 llvm_unreachable("invalid fp inline imm");
1336 }
1337}
1338
1339static int64_t getInlineImmValBF16(unsigned Imm) {
1340 switch (Imm) {
1341 case 240:
1342 return 0x3F00;
1343 case 241:
1344 return 0xBF00;
1345 case 242:
1346 return 0x3F80;
1347 case 243:
1348 return 0xBF80;
1349 case 244:
1350 return 0x4000;
1351 case 245:
1352 return 0xC000;
1353 case 246:
1354 return 0x4080;
1355 case 247:
1356 return 0xC080;
1357 case 248: // 1 / (2 * PI)
1358 return 0x3E22;
1359 default:
1360 llvm_unreachable("invalid fp inline imm");
1361 }
1362}
1363
1364static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1366 : getInlineImmValF16(Imm);
1367}
1368
1369MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1373
1374 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1375 // ImmWidth 0 is a default case where operand should not allow immediates.
1376 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1377 // use it to print verbose error message.
1378 switch (ImmWidth) {
1379 case 0:
1380 case 32:
1382 case 64:
1384 case 16:
1385 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1386 default:
1387 llvm_unreachable("implement me");
1388 }
1389}
1390
1392 using namespace AMDGPU;
1393
1394 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1395 switch (Width) {
1396 default: // fall
1397 case OPW32:
1398 case OPW16:
1399 case OPWV216:
1400 return VGPR_32RegClassID;
1401 case OPW64:
1402 case OPWV232: return VReg_64RegClassID;
1403 case OPW96: return VReg_96RegClassID;
1404 case OPW128: return VReg_128RegClassID;
1405 case OPW160: return VReg_160RegClassID;
1406 case OPW256: return VReg_256RegClassID;
1407 case OPW288: return VReg_288RegClassID;
1408 case OPW320: return VReg_320RegClassID;
1409 case OPW352: return VReg_352RegClassID;
1410 case OPW384: return VReg_384RegClassID;
1411 case OPW512: return VReg_512RegClassID;
1412 case OPW1024: return VReg_1024RegClassID;
1413 }
1414}
1415
1417 using namespace AMDGPU;
1418
1419 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1420 switch (Width) {
1421 default: // fall
1422 case OPW32:
1423 case OPW16:
1424 case OPWV216:
1425 return AGPR_32RegClassID;
1426 case OPW64:
1427 case OPWV232: return AReg_64RegClassID;
1428 case OPW96: return AReg_96RegClassID;
1429 case OPW128: return AReg_128RegClassID;
1430 case OPW160: return AReg_160RegClassID;
1431 case OPW256: return AReg_256RegClassID;
1432 case OPW288: return AReg_288RegClassID;
1433 case OPW320: return AReg_320RegClassID;
1434 case OPW352: return AReg_352RegClassID;
1435 case OPW384: return AReg_384RegClassID;
1436 case OPW512: return AReg_512RegClassID;
1437 case OPW1024: return AReg_1024RegClassID;
1438 }
1439}
1440
1441
1443 using namespace AMDGPU;
1444
1445 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1446 switch (Width) {
1447 default: // fall
1448 case OPW32:
1449 case OPW16:
1450 case OPWV216:
1451 return SGPR_32RegClassID;
1452 case OPW64:
1453 case OPWV232: return SGPR_64RegClassID;
1454 case OPW96: return SGPR_96RegClassID;
1455 case OPW128: return SGPR_128RegClassID;
1456 case OPW160: return SGPR_160RegClassID;
1457 case OPW256: return SGPR_256RegClassID;
1458 case OPW288: return SGPR_288RegClassID;
1459 case OPW320: return SGPR_320RegClassID;
1460 case OPW352: return SGPR_352RegClassID;
1461 case OPW384: return SGPR_384RegClassID;
1462 case OPW512: return SGPR_512RegClassID;
1463 }
1464}
1465
1467 using namespace AMDGPU;
1468
1469 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1470 switch (Width) {
1471 default: // fall
1472 case OPW32:
1473 case OPW16:
1474 case OPWV216:
1475 return TTMP_32RegClassID;
1476 case OPW64:
1477 case OPWV232: return TTMP_64RegClassID;
1478 case OPW128: return TTMP_128RegClassID;
1479 case OPW256: return TTMP_256RegClassID;
1480 case OPW288: return TTMP_288RegClassID;
1481 case OPW320: return TTMP_320RegClassID;
1482 case OPW352: return TTMP_352RegClassID;
1483 case OPW384: return TTMP_384RegClassID;
1484 case OPW512: return TTMP_512RegClassID;
1485 }
1486}
1487
1488int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1489 using namespace AMDGPU::EncValues;
1490
1491 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1492 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1493
1494 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1495}
1496
1498 bool MandatoryLiteral,
1499 unsigned ImmWidth,
1500 AMDGPU::OperandSemantics Sema) const {
1501 using namespace AMDGPU::EncValues;
1502
1503 assert(Val < 1024); // enum10
1504
1505 bool IsAGPR = Val & 512;
1506 Val &= 511;
1507
1508 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1509 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1510 : getVgprClassId(Width), Val - VGPR_MIN);
1511 }
1512 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1513 Sema);
1514}
1515
1518 bool MandatoryLiteral, unsigned ImmWidth,
1519 AMDGPU::OperandSemantics Sema) const {
1520 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1521 // decoded earlier.
1522 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1523 using namespace AMDGPU::EncValues;
1524
1525 if (Val <= SGPR_MAX) {
1526 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1527 static_assert(SGPR_MIN == 0);
1528 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1529 }
1530
1531 int TTmpIdx = getTTmpIdx(Val);
1532 if (TTmpIdx >= 0) {
1533 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1534 }
1535
1536 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1537 return decodeIntImmed(Val);
1538
1539 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1540 return decodeFPImmed(ImmWidth, Val, Sema);
1541
1542 if (Val == LITERAL_CONST) {
1543 if (MandatoryLiteral)
1544 // Keep a sentinel value for deferred setting
1545 return MCOperand::createImm(LITERAL_CONST);
1546 else
1548 }
1549
1550 switch (Width) {
1551 case OPW32:
1552 case OPW16:
1553 case OPWV216:
1554 return decodeSpecialReg32(Val);
1555 case OPW64:
1556 case OPWV232:
1557 return decodeSpecialReg64(Val);
1558 default:
1559 llvm_unreachable("unexpected immediate type");
1560 }
1561}
1562
1563// Bit 0 of DstY isn't stored in the instruction, because it's always the
1564// opposite of bit 0 of DstX.
1566 unsigned Val) const {
1567 int VDstXInd =
1568 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1569 assert(VDstXInd != -1);
1570 assert(Inst.getOperand(VDstXInd).isReg());
1571 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1572 Val |= ~XDstReg & 1;
1574 return createRegOperand(getVgprClassId(Width), Val);
1575}
1576
1578 using namespace AMDGPU;
1579
1580 switch (Val) {
1581 // clang-format off
1582 case 102: return createRegOperand(FLAT_SCR_LO);
1583 case 103: return createRegOperand(FLAT_SCR_HI);
1584 case 104: return createRegOperand(XNACK_MASK_LO);
1585 case 105: return createRegOperand(XNACK_MASK_HI);
1586 case 106: return createRegOperand(VCC_LO);
1587 case 107: return createRegOperand(VCC_HI);
1588 case 108: return createRegOperand(TBA_LO);
1589 case 109: return createRegOperand(TBA_HI);
1590 case 110: return createRegOperand(TMA_LO);
1591 case 111: return createRegOperand(TMA_HI);
1592 case 124:
1593 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1594 case 125:
1595 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1596 case 126: return createRegOperand(EXEC_LO);
1597 case 127: return createRegOperand(EXEC_HI);
1598 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1599 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1600 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1601 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1602 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1603 case 251: return createRegOperand(SRC_VCCZ);
1604 case 252: return createRegOperand(SRC_EXECZ);
1605 case 253: return createRegOperand(SRC_SCC);
1606 case 254: return createRegOperand(LDS_DIRECT);
1607 default: break;
1608 // clang-format on
1609 }
1610 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1611}
1612
1614 using namespace AMDGPU;
1615
1616 switch (Val) {
1617 case 102: return createRegOperand(FLAT_SCR);
1618 case 104: return createRegOperand(XNACK_MASK);
1619 case 106: return createRegOperand(VCC);
1620 case 108: return createRegOperand(TBA);
1621 case 110: return createRegOperand(TMA);
1622 case 124:
1623 if (isGFX11Plus())
1624 return createRegOperand(SGPR_NULL);
1625 break;
1626 case 125:
1627 if (!isGFX11Plus())
1628 return createRegOperand(SGPR_NULL);
1629 break;
1630 case 126: return createRegOperand(EXEC);
1631 case 235: return createRegOperand(SRC_SHARED_BASE);
1632 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1633 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1634 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1635 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1636 case 251: return createRegOperand(SRC_VCCZ);
1637 case 252: return createRegOperand(SRC_EXECZ);
1638 case 253: return createRegOperand(SRC_SCC);
1639 default: break;
1640 }
1641 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1642}
1643
1645AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1646 unsigned ImmWidth,
1647 AMDGPU::OperandSemantics Sema) const {
1648 using namespace AMDGPU::SDWA;
1649 using namespace AMDGPU::EncValues;
1650
1651 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1652 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1653 // XXX: cast to int is needed to avoid stupid warning:
1654 // compare with unsigned is always true
1655 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1656 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1657 return createRegOperand(getVgprClassId(Width),
1658 Val - SDWA9EncValues::SRC_VGPR_MIN);
1659 }
1660 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1661 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1662 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1663 return createSRegOperand(getSgprClassId(Width),
1664 Val - SDWA9EncValues::SRC_SGPR_MIN);
1665 }
1666 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1667 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1668 return createSRegOperand(getTtmpClassId(Width),
1669 Val - SDWA9EncValues::SRC_TTMP_MIN);
1670 }
1671
1672 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1673
1674 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1675 return decodeIntImmed(SVal);
1676
1677 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1678 return decodeFPImmed(ImmWidth, SVal, Sema);
1679
1680 return decodeSpecialReg32(SVal);
1681 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1682 return createRegOperand(getVgprClassId(Width), Val);
1683 }
1684 llvm_unreachable("unsupported target");
1685}
1686
1689}
1690
1693}
1694
1696 using namespace AMDGPU::SDWA;
1697
1698 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1699 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1700 "SDWAVopcDst should be present only on GFX9+");
1701
1702 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1703
1704 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1705 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1706
1707 int TTmpIdx = getTTmpIdx(Val);
1708 if (TTmpIdx >= 0) {
1709 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1710 return createSRegOperand(TTmpClsId, TTmpIdx);
1711 } else if (Val > SGPR_MAX) {
1712 return IsWave64 ? decodeSpecialReg64(Val)
1713 : decodeSpecialReg32(Val);
1714 } else {
1715 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1716 }
1717 } else {
1718 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1719 }
1720}
1721
1723 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1724 ? decodeSrcOp(OPW64, Val)
1725 : decodeSrcOp(OPW32, Val);
1726}
1727
1729 return decodeSrcOp(OPW32, Val);
1730}
1731
1734 return MCOperand();
1735 return MCOperand::createImm(Val);
1736}
1737
1739 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1740}
1741
1743
1745 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1746}
1747
1749
1751
1753 return AMDGPU::isGFX10Plus(STI);
1754}
1755
1757 return STI.hasFeature(AMDGPU::FeatureGFX11);
1758}
1759
1761 return AMDGPU::isGFX11Plus(STI);
1762}
1763
1765 return STI.hasFeature(AMDGPU::FeatureGFX12);
1766}
1767
1769 return AMDGPU::isGFX12Plus(STI);
1770}
1771
1773 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1774}
1775
1778}
1779
1780//===----------------------------------------------------------------------===//
1781// AMDGPU specific symbol handling
1782//===----------------------------------------------------------------------===//
1783#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1784#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1785 do { \
1786 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1787 } while (0)
1788#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1789 do { \
1790 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1791 << GET_FIELD(MASK) << '\n'; \
1792 } while (0)
1793
1794// NOLINTNEXTLINE(readability-identifier-naming)
1796 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1797 using namespace amdhsa;
1798 StringRef Indent = "\t";
1799
1800 // We cannot accurately backward compute #VGPRs used from
1801 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1802 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1803 // simply calculate the inverse of what the assembler does.
1804
1805 uint32_t GranulatedWorkitemVGPRCount =
1806 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1807
1808 uint32_t NextFreeVGPR =
1809 (GranulatedWorkitemVGPRCount + 1) *
1810 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1811
1812 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1813
1814 // We cannot backward compute values used to calculate
1815 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1816 // directives can't be computed:
1817 // .amdhsa_reserve_vcc
1818 // .amdhsa_reserve_flat_scratch
1819 // .amdhsa_reserve_xnack_mask
1820 // They take their respective default values if not specified in the assembly.
1821 //
1822 // GRANULATED_WAVEFRONT_SGPR_COUNT
1823 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1824 //
1825 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1826 // are set to 0. So while disassembling we consider that:
1827 //
1828 // GRANULATED_WAVEFRONT_SGPR_COUNT
1829 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1830 //
1831 // The disassembler cannot recover the original values of those 3 directives.
1832
1833 uint32_t GranulatedWavefrontSGPRCount =
1834 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1835
1836 if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1837 return MCDisassembler::Fail;
1838
1839 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1841
1842 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1844 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1845 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1846 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1847
1848 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1849 return MCDisassembler::Fail;
1850
1851 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1852 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1853 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1854 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1855 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1856 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1857 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1858 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1859
1860 if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1861 return MCDisassembler::Fail;
1862
1863 if (!isGFX12Plus())
1864 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1865 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1866
1867 if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1868 return MCDisassembler::Fail;
1869
1870 if (!isGFX12Plus())
1871 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1872 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1873
1874 if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1875 return MCDisassembler::Fail;
1876
1877 if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1878 return MCDisassembler::Fail;
1879
1880 if (isGFX9Plus())
1881 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1882
1883 if (!isGFX9Plus())
1884 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
1885 return MCDisassembler::Fail;
1886 if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
1887 return MCDisassembler::Fail;
1888 if (!isGFX10Plus())
1889 if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
1890 return MCDisassembler::Fail;
1891
1892 if (isGFX10Plus()) {
1893 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1894 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1895 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1896 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1897 }
1898
1899 if (isGFX12Plus())
1900 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1901 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1902
1904}
1905
1906// NOLINTNEXTLINE(readability-identifier-naming)
1908 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1909 using namespace amdhsa;
1910 StringRef Indent = "\t";
1912 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1913 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1914 else
1915 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1916 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1917 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1918 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1919 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1920 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1921 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1922 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1923 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1924 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1925 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1926 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1927
1928 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1929 return MCDisassembler::Fail;
1930
1931 if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1932 return MCDisassembler::Fail;
1933
1934 if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1935 return MCDisassembler::Fail;
1936
1938 ".amdhsa_exception_fp_ieee_invalid_op",
1939 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1940 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1941 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1943 ".amdhsa_exception_fp_ieee_div_zero",
1944 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1945 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1946 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1947 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1948 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1949 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1950 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1951 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1952 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1953
1954 if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1955 return MCDisassembler::Fail;
1956
1958}
1959
1960// NOLINTNEXTLINE(readability-identifier-naming)
1962 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1963 using namespace amdhsa;
1964 StringRef Indent = "\t";
1965 if (isGFX90A()) {
1966 KdStream << Indent << ".amdhsa_accum_offset "
1967 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1968 << '\n';
1969 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
1970 return MCDisassembler::Fail;
1971 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1972 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
1973 return MCDisassembler::Fail;
1974 } else if (isGFX10Plus()) {
1975 // Bits [0-3].
1976 if (!isGFX12Plus()) {
1977 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
1978 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
1979 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
1980 } else {
1982 "SHARED_VGPR_COUNT",
1983 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
1984 }
1985 } else {
1986 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0)
1987 return MCDisassembler::Fail;
1988 }
1989
1990 // Bits [4-11].
1991 if (isGFX11()) {
1992 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1993 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
1994 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1995 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
1996 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1997 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
1998 } else if (isGFX12Plus()) {
2000 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2001 } else {
2002 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1)
2003 return MCDisassembler::Fail;
2004 }
2005
2006 // Bits [12].
2007 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2)
2008 return MCDisassembler::Fail;
2009
2010 // Bits [13].
2011 if (isGFX12Plus()) {
2013 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2014 } else {
2015 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3)
2016 return MCDisassembler::Fail;
2017 }
2018
2019 // Bits [14-30].
2020 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4)
2021 return MCDisassembler::Fail;
2022
2023 // Bits [31].
2024 if (isGFX11Plus()) {
2026 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2027 } else {
2028 if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5)
2029 return MCDisassembler::Fail;
2030 }
2031 } else if (FourByteBuffer) {
2032 return MCDisassembler::Fail;
2033 }
2035}
2036#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2037#undef PRINT_DIRECTIVE
2038#undef GET_FIELD
2039
2043 raw_string_ostream &KdStream) const {
2044#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2045 do { \
2046 KdStream << Indent << DIRECTIVE " " \
2047 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2048 } while (0)
2049
2050 uint16_t TwoByteBuffer = 0;
2051 uint32_t FourByteBuffer = 0;
2052
2053 StringRef ReservedBytes;
2054 StringRef Indent = "\t";
2055
2056 assert(Bytes.size() == 64);
2057 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2058
2059 switch (Cursor.tell()) {
2061 FourByteBuffer = DE.getU32(Cursor);
2062 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2063 << '\n';
2065
2067 FourByteBuffer = DE.getU32(Cursor);
2068 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2069 << FourByteBuffer << '\n';
2071
2073 FourByteBuffer = DE.getU32(Cursor);
2074 KdStream << Indent << ".amdhsa_kernarg_size "
2075 << FourByteBuffer << '\n';
2077
2079 // 4 reserved bytes, must be 0.
2080 ReservedBytes = DE.getBytes(Cursor, 4);
2081 for (int I = 0; I < 4; ++I) {
2082 if (ReservedBytes[I] != 0) {
2083 return MCDisassembler::Fail;
2084 }
2085 }
2087
2089 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2090 // So far no directive controls this for Code Object V3, so simply skip for
2091 // disassembly.
2092 DE.skip(Cursor, 8);
2094
2096 // 20 reserved bytes, must be 0.
2097 ReservedBytes = DE.getBytes(Cursor, 20);
2098 for (int I = 0; I < 20; ++I) {
2099 if (ReservedBytes[I] != 0) {
2100 return MCDisassembler::Fail;
2101 }
2102 }
2104
2106 FourByteBuffer = DE.getU32(Cursor);
2107 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2108
2110 FourByteBuffer = DE.getU32(Cursor);
2111 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2112
2114 FourByteBuffer = DE.getU32(Cursor);
2115 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2116
2118 using namespace amdhsa;
2119 TwoByteBuffer = DE.getU16(Cursor);
2120
2122 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2123 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2124 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2125 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2126 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2127 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2128 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2129 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2130 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2131 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2133 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2134 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2135 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2136 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2137
2138 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2139 return MCDisassembler::Fail;
2140
2141 // Reserved for GFX9
2142 if (isGFX9() &&
2143 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2144 return MCDisassembler::Fail;
2145 } else if (isGFX10Plus()) {
2146 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2147 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2148 }
2149
2150 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2151 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2152 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2153
2154 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2155 return MCDisassembler::Fail;
2156
2158
2160 using namespace amdhsa;
2161 TwoByteBuffer = DE.getU16(Cursor);
2162 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2163 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2164 KERNARG_PRELOAD_SPEC_LENGTH);
2165 }
2166
2167 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2168 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2169 KERNARG_PRELOAD_SPEC_OFFSET);
2170 }
2172
2174 // 4 bytes from here are reserved, must be 0.
2175 ReservedBytes = DE.getBytes(Cursor, 4);
2176 for (int I = 0; I < 4; ++I) {
2177 if (ReservedBytes[I] != 0)
2178 return MCDisassembler::Fail;
2179 }
2181
2182 default:
2183 llvm_unreachable("Unhandled index. Case statements cover everything.");
2184 return MCDisassembler::Fail;
2185 }
2186#undef PRINT_DIRECTIVE
2187}
2188
2190 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2191 // CP microcode requires the kernel descriptor to be 64 aligned.
2192 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2193 return MCDisassembler::Fail;
2194
2195 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2196 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2197 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2198 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2199 // when required.
2200 if (isGFX10Plus()) {
2201 uint16_t KernelCodeProperties =
2204 EnableWavefrontSize32 =
2205 AMDHSA_BITS_GET(KernelCodeProperties,
2206 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2207 }
2208
2209 std::string Kd;
2210 raw_string_ostream KdStream(Kd);
2211 KdStream << ".amdhsa_kernel " << KdName << '\n';
2212
2214 while (C && C.tell() < Bytes.size()) {
2216 decodeKernelDescriptorDirective(C, Bytes, KdStream);
2217
2218 cantFail(C.takeError());
2219
2221 return MCDisassembler::Fail;
2222 }
2223 KdStream << ".end_amdhsa_kernel\n";
2224 outs() << KdStream.str();
2226}
2227
2228std::optional<MCDisassembler::DecodeStatus>
2231 raw_ostream &CStream) const {
2232 // Right now only kernel descriptor needs to be handled.
2233 // We ignore all other symbols for target specific handling.
2234 // TODO:
2235 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2236 // Object V2 and V3 when symbols are marked protected.
2237
2238 // amd_kernel_code_t for Code Object V2.
2239 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2240 Size = 256;
2241 return MCDisassembler::Fail;
2242 }
2243
2244 // Code Object V3 kernel descriptors.
2245 StringRef Name = Symbol.Name;
2246 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2247 Size = 64; // Size = 64 regardless of success or failure.
2248 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2249 }
2250 return std::nullopt;
2251}
2252
2253//===----------------------------------------------------------------------===//
2254// AMDGPUSymbolizer
2255//===----------------------------------------------------------------------===//
2256
2257// Try to find symbol name for specified label
2259 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2260 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2261 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2262
2263 if (!IsBranch) {
2264 return false;
2265 }
2266
2267 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2268 if (!Symbols)
2269 return false;
2270
2271 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2272 return Val.Addr == static_cast<uint64_t>(Value) &&
2273 Val.Type == ELF::STT_NOTYPE;
2274 });
2275 if (Result != Symbols->end()) {
2276 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2277 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2279 return true;
2280 }
2281 // Add to list of referenced addresses, so caller can synthesize a label.
2282 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2283 return false;
2284}
2285
2287 int64_t Value,
2288 uint64_t Address) {
2289 llvm_unreachable("unimplemented");
2290}
2291
2292//===----------------------------------------------------------------------===//
2293// Initialization
2294//===----------------------------------------------------------------------===//
2295
2297 LLVMOpInfoCallback /*GetOpInfo*/,
2298 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2299 void *DisInfo,
2300 MCContext *Ctx,
2301 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2302 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2303}
2304
2306 const MCSubtargetInfo &STI,
2307 MCContext &Ctx) {
2308 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2309}
2310
2316}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
DecodeStatus decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
MCOperand decodeDpp8FI(unsigned Val) const
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const override
Used to perform separate target specific disassembly for a particular symbol.
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSDWASrc16(unsigned Val) const
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Context object for machine code objects.
Definition: MCContext.h:76
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:678
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1320
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1334
@ STT_OBJECT
Definition: ELF.h:1321
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:386
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:749
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.