LLVM 20.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInstrDesc.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-disassembler"
41
42#define SGPR_MAX \
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
45
47
49 MCContext &Ctx, MCInstrInfo const *MCII)
50 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
51 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
53 // ToDo: AMDGPUDisassembler supports only VI ISA.
54 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
55 report_fatal_error("Disassembly not yet supported for subtarget");
56
57 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
58 createConstantSymbolExpr(Symbol, Code);
59
60 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
61 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
62 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
63}
64
67}
68
70addOperand(MCInst &Inst, const MCOperand& Opnd) {
71 Inst.addOperand(Opnd);
72 return Opnd.isValid() ?
75}
76
78 uint16_t NameIdx) {
79 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
80 if (OpIdx != -1) {
81 auto *I = MI.begin();
82 std::advance(I, OpIdx);
83 MI.insert(I, Op);
84 }
85 return OpIdx;
86}
87
88static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
90 const MCDisassembler *Decoder) {
91 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
92
93 // Our branches take a simm16.
94 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
95
96 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
98 return addOperand(Inst, MCOperand::createImm(Imm));
99}
100
102 const MCDisassembler *Decoder) {
103 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
104 int64_t Offset;
105 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
106 Offset = SignExtend64<24>(Imm);
107 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
108 Offset = Imm & 0xFFFFF;
109 } else { // GFX9+ supports 21-bit signed offsets.
110 Offset = SignExtend64<21>(Imm);
111 }
113}
114
115static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
116 const MCDisassembler *Decoder) {
117 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
118 return addOperand(Inst, DAsm->decodeBoolReg(Val));
119}
120
121static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
123 const MCDisassembler *Decoder) {
124 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
125 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
126}
127
128static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
129 const MCDisassembler *Decoder) {
130 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
131 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
132}
133
134#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
135 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
136 uint64_t /*Addr*/, \
137 const MCDisassembler *Decoder) { \
138 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
139 return addOperand(Inst, DAsm->DecoderName(Imm)); \
140 }
141
142// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
143// number of register. Used by VGPR only and AGPR only operands.
144#define DECODE_OPERAND_REG_8(RegClass) \
145 static DecodeStatus Decode##RegClass##RegisterClass( \
146 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
147 const MCDisassembler *Decoder) { \
148 assert(Imm < (1 << 8) && "8-bit encoding"); \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand( \
151 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
152 }
153
154#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
155 ImmWidth) \
156 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
157 const MCDisassembler *Decoder) { \
158 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
159 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
160 return addOperand(Inst, \
161 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
162 MandatoryLiteral, ImmWidth)); \
163 }
164
165static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
167 unsigned Imm, unsigned EncImm,
168 bool MandatoryLiteral, unsigned ImmWidth,
170 const MCDisassembler *Decoder) {
171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
172 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
174 ImmWidth, Sema));
175}
176
177// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
178// get register class. Used by SGPR only operands.
179#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
180 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
181
182// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
183// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
184// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
185// Used by AV_ register classes (AGPR or VGPR only register operands).
186template <AMDGPUDisassembler::OpWidthTy OpWidth>
187static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
188 const MCDisassembler *Decoder) {
189 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
190 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
191}
192
193// Decoder for Src(9-bit encoding) registers only.
194template <AMDGPUDisassembler::OpWidthTy OpWidth>
195static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
196 uint64_t /* Addr */,
197 const MCDisassembler *Decoder) {
198 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
200}
201
202// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
203// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
204// only.
205template <AMDGPUDisassembler::OpWidthTy OpWidth>
206static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
207 const MCDisassembler *Decoder) {
208 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
210}
211
212// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
213// Imm{9} is acc, registers only.
214template <AMDGPUDisassembler::OpWidthTy OpWidth>
215static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
216 uint64_t /* Addr */,
217 const MCDisassembler *Decoder) {
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
220}
221
222// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
223// register from RegClass or immediate. Registers that don't belong to RegClass
224// will be decoded and InstPrinter will report warning. Immediate will be
225// decoded into constant of size ImmWidth, should match width of immediate used
226// by OperandType (important for floating point types).
227template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
228 unsigned OperandSemantics>
229static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
230 uint64_t /* Addr */,
231 const MCDisassembler *Decoder) {
232 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
233 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
234}
235
236// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
237// and decode using 'enum10' from decodeSrcOp.
238template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
239 unsigned OperandSemantics>
240static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
241 uint64_t /* Addr */,
242 const MCDisassembler *Decoder) {
243 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
244 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
245}
246
247template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
248 unsigned OperandSemantics>
250 uint64_t /* Addr */,
251 const MCDisassembler *Decoder) {
252 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
253 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
254}
255
256// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
257// when RegisterClass is used as an operand. Most often used for destination
258// operands.
259
261DECODE_OPERAND_REG_8(VGPR_32_Lo128)
264DECODE_OPERAND_REG_8(VReg_128)
265DECODE_OPERAND_REG_8(VReg_192)
266DECODE_OPERAND_REG_8(VReg_256)
267DECODE_OPERAND_REG_8(VReg_288)
268DECODE_OPERAND_REG_8(VReg_352)
269DECODE_OPERAND_REG_8(VReg_384)
270DECODE_OPERAND_REG_8(VReg_512)
271DECODE_OPERAND_REG_8(VReg_1024)
272
273DECODE_OPERAND_REG_7(SReg_32, OPW32)
274DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
275DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
276DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
277DECODE_OPERAND_REG_7(SReg_64, OPW64)
278DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
279DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
280DECODE_OPERAND_REG_7(SReg_96, OPW96)
281DECODE_OPERAND_REG_7(SReg_128, OPW128)
282DECODE_OPERAND_REG_7(SReg_256, OPW256)
283DECODE_OPERAND_REG_7(SReg_512, OPW512)
284
287DECODE_OPERAND_REG_8(AReg_128)
288DECODE_OPERAND_REG_8(AReg_256)
289DECODE_OPERAND_REG_8(AReg_512)
290DECODE_OPERAND_REG_8(AReg_1024)
291
293 uint64_t /*Addr*/,
294 const MCDisassembler *Decoder) {
295 assert(isUInt<10>(Imm) && "10-bit encoding expected");
296 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
297
298 bool IsHi = Imm & (1 << 9);
299 unsigned RegIdx = Imm & 0xff;
300 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
301 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
302}
303
304static DecodeStatus
306 const MCDisassembler *Decoder) {
307 assert(isUInt<8>(Imm) && "8-bit encoding expected");
308
309 bool IsHi = Imm & (1 << 7);
310 unsigned RegIdx = Imm & 0x7f;
311 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
312 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
313}
314
315template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
316 unsigned OperandSemantics>
318 uint64_t /*Addr*/,
319 const MCDisassembler *Decoder) {
320 assert(isUInt<9>(Imm) && "9-bit encoding expected");
321
322 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
323 if (Imm & AMDGPU::EncValues::IS_VGPR) {
324 bool IsHi = Imm & (1 << 7);
325 unsigned RegIdx = Imm & 0x7f;
326 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
327 }
328 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
329 OpWidth, Imm & 0xFF, false, ImmWidth,
330 (AMDGPU::OperandSemantics)OperandSemantics));
331}
332
333template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
334 unsigned OperandSemantics>
335static DecodeStatus
337 uint64_t /*Addr*/,
338 const MCDisassembler *Decoder) {
339 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
340 assert(isUInt<9>(Imm) && "9-bit encoding expected");
341
342 if (Imm & AMDGPU::EncValues::IS_VGPR) {
343 bool IsHi = Imm & (1 << 7);
344 unsigned RegIdx = Imm & 0x7f;
345 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
346 }
347 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
348 OpWidth, Imm & 0xFF, true, ImmWidth,
349 (AMDGPU::OperandSemantics)OperandSemantics));
350}
351
352template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
353 unsigned OperandSemantics>
354static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
355 uint64_t /*Addr*/,
356 const MCDisassembler *Decoder) {
357 assert(isUInt<10>(Imm) && "10-bit encoding expected");
358
359 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
360 if (Imm & AMDGPU::EncValues::IS_VGPR) {
361 bool IsHi = Imm & (1 << 9);
362 unsigned RegIdx = Imm & 0xff;
363 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
364 }
365 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
366 OpWidth, Imm & 0xFF, false, ImmWidth,
367 (AMDGPU::OperandSemantics)OperandSemantics));
368}
369
370static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
371 uint64_t /*Addr*/,
372 const MCDisassembler *Decoder) {
373 assert(isUInt<10>(Imm) && "10-bit encoding expected");
374 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
375
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377
378 bool IsHi = Imm & (1 << 9);
379 unsigned RegIdx = Imm & 0xff;
380 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
381}
382
383static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
385 const MCDisassembler *Decoder) {
386 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
387 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
388}
389
390static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
391 uint64_t Addr, const void *Decoder) {
392 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
393 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
394}
395
396static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
397 const MCRegisterInfo *MRI) {
398 if (OpIdx < 0)
399 return false;
400
401 const MCOperand &Op = Inst.getOperand(OpIdx);
402 if (!Op.isReg())
403 return false;
404
405 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
406 auto Reg = Sub ? Sub : Op.getReg();
407 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
408}
409
410static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
412 const MCDisassembler *Decoder) {
413 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
414 if (!DAsm->isGFX90A()) {
415 Imm &= 511;
416 } else {
417 // If atomic has both vdata and vdst their register classes are tied.
418 // The bit is decoded along with the vdst, first operand. We need to
419 // change register class to AGPR if vdst was AGPR.
420 // If a DS instruction has both data0 and data1 their register classes
421 // are also tied.
422 unsigned Opc = Inst.getOpcode();
423 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
424 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
425 : AMDGPU::OpName::vdata;
426 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
427 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
428 if ((int)Inst.getNumOperands() == DataIdx) {
429 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
430 if (IsAGPROperand(Inst, DstIdx, MRI))
431 Imm |= 512;
432 }
433
434 if (TSFlags & SIInstrFlags::DS) {
435 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
436 if ((int)Inst.getNumOperands() == Data2Idx &&
437 IsAGPROperand(Inst, DataIdx, MRI))
438 Imm |= 512;
439 }
440 }
441 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
442}
443
444template <AMDGPUDisassembler::OpWidthTy Opw>
445static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
446 uint64_t /* Addr */,
447 const MCDisassembler *Decoder) {
448 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
449}
450
451static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
453 const MCDisassembler *Decoder) {
454 assert(Imm < (1 << 9) && "9-bit encoding");
455 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
456 return addOperand(Inst,
457 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
459}
460
461#define DECODE_SDWA(DecName) \
462DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
463
464DECODE_SDWA(Src32)
465DECODE_SDWA(Src16)
466DECODE_SDWA(VopcDst)
467
468static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
469 uint64_t /* Addr */,
470 const MCDisassembler *Decoder) {
471 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
472 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
473}
474
475#include "AMDGPUGenDisassemblerTables.inc"
476
477//===----------------------------------------------------------------------===//
478//
479//===----------------------------------------------------------------------===//
480
481template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
482 assert(Bytes.size() >= sizeof(T));
483 const auto Res =
484 support::endian::read<T, llvm::endianness::little>(Bytes.data());
485 Bytes = Bytes.slice(sizeof(T));
486 return Res;
487}
488
490 assert(Bytes.size() >= 12);
491 uint64_t Lo =
492 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
493 Bytes = Bytes.slice(8);
494 uint64_t Hi =
495 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
496 Bytes = Bytes.slice(4);
497 return DecoderUInt128(Lo, Hi);
498}
499
501 assert(Bytes.size() >= 16);
502 uint64_t Lo =
503 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
504 Bytes = Bytes.slice(8);
505 uint64_t Hi =
506 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
507 Bytes = Bytes.slice(8);
508 return DecoderUInt128(Lo, Hi);
509}
510
512 ArrayRef<uint8_t> Bytes_,
514 raw_ostream &CS) const {
515 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
516 Bytes = Bytes_.slice(0, MaxInstBytesNum);
517
518 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
519 // there are fewer bytes left). This will be overridden on success.
520 Size = std::min((size_t)4, Bytes_.size());
521
522 do {
523 // ToDo: better to switch encoding length using some bit predicate
524 // but it is unknown yet, so try all we can
525
526 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
527 // encodings
528 if (isGFX11Plus() && Bytes.size() >= 12 ) {
529 DecoderUInt128 DecW = eat12Bytes(Bytes);
530
531 if (isGFX11() &&
532 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
533 DecW, Address, CS))
534 break;
535
536 if (isGFX12() &&
537 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
538 DecW, Address, CS))
539 break;
540
541 if (isGFX12() &&
542 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
543 break;
544
545 // Reinitialize Bytes
546 Bytes = Bytes_.slice(0, MaxInstBytesNum);
547
548 } else if (Bytes.size() >= 16 &&
549 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
550 DecoderUInt128 DecW = eat16Bytes(Bytes);
551 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
552 break;
553
554 // Reinitialize Bytes
555 Bytes = Bytes_.slice(0, MaxInstBytesNum);
556 }
557
558 if (Bytes.size() >= 8) {
559 const uint64_t QW = eatBytes<uint64_t>(Bytes);
560
561 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
562 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
563 break;
564
565 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
566 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
567 break;
568
569 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
570 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
571 break;
572
573 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
574 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
575 // table first so we print the correct name.
576 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
577 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
578 break;
579
580 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
581 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
582 break;
583
584 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
585 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
586 break;
587
588 if ((isVI() || isGFX9()) &&
589 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
590 break;
591
592 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
593 break;
594
595 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
596 break;
597
598 if (isGFX12() &&
599 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
600 Address, CS))
601 break;
602
603 if (isGFX11() &&
604 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
605 Address, CS))
606 break;
607
608 if (isGFX11() &&
609 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
610 break;
611
612 if (isGFX12() &&
613 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
614 break;
615
616 // Reinitialize Bytes
617 Bytes = Bytes_.slice(0, MaxInstBytesNum);
618 }
619
620 // Try decode 32-bit instruction
621 if (Bytes.size() >= 4) {
622 const uint32_t DW = eatBytes<uint32_t>(Bytes);
623
624 if ((isVI() || isGFX9()) &&
625 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
626 break;
627
628 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
629 break;
630
631 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
632 break;
633
634 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
635 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
636 break;
637
638 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
639 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
640 break;
641
642 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
643 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
644 break;
645
646 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
647 break;
648
649 if (isGFX11() &&
650 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
651 Address, CS))
652 break;
653
654 if (isGFX12() &&
655 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
656 Address, CS))
657 break;
658 }
659
661 } while (false);
662
663 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
664 if (isMacDPP(MI))
666
667 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
669 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
670 AMDGPU::isVOPC64DPP(MI.getOpcode()))
671 convertVOPCDPPInst(MI); // Special VOP3 case
672 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
673 -1)
675 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
676 convertVOP3DPPInst(MI); // Regular VOP3 case
677 }
678
680
681 if (AMDGPU::isMAC(MI.getOpcode())) {
682 // Insert dummy unused src2_modifiers.
684 AMDGPU::OpName::src2_modifiers);
685 }
686
687 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
688 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
689 // Insert dummy unused src2_modifiers.
691 AMDGPU::OpName::src2_modifiers);
692 }
693
694 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
696 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
697 }
698
699 if (MCII->get(MI.getOpcode()).TSFlags &
701 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
702 AMDGPU::OpName::cpol);
703 if (CPolPos != -1) {
704 unsigned CPol =
705 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
707 if (MI.getNumOperands() <= (unsigned)CPolPos) {
709 AMDGPU::OpName::cpol);
710 } else if (CPol) {
711 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
712 }
713 }
714 }
715
716 if ((MCII->get(MI.getOpcode()).TSFlags &
718 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
719 // GFX90A lost TFE, its place is occupied by ACC.
720 int TFEOpIdx =
721 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
722 if (TFEOpIdx != -1) {
723 auto *TFEIter = MI.begin();
724 std::advance(TFEIter, TFEOpIdx);
725 MI.insert(TFEIter, MCOperand::createImm(0));
726 }
727 }
728
729 if (MCII->get(MI.getOpcode()).TSFlags &
731 int SWZOpIdx =
732 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
733 if (SWZOpIdx != -1) {
734 auto *SWZIter = MI.begin();
735 std::advance(SWZIter, SWZOpIdx);
736 MI.insert(SWZIter, MCOperand::createImm(0));
737 }
738 }
739
740 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
741 int VAddr0Idx =
742 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
743 int RsrcIdx =
744 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
745 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
746 if (VAddr0Idx >= 0 && NSAArgs > 0) {
747 unsigned NSAWords = (NSAArgs + 3) / 4;
748 if (Bytes.size() < 4 * NSAWords)
750 for (unsigned i = 0; i < NSAArgs; ++i) {
751 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
752 auto VAddrRCID =
753 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
754 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
755 }
756 Bytes = Bytes.slice(4 * NSAWords);
757 }
758
760 }
761
762 if (MCII->get(MI.getOpcode()).TSFlags &
765
766 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
768
769 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
771
772 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
774
775 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
777
778 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
779 AMDGPU::OpName::vdst_in);
780 if (VDstIn_Idx != -1) {
781 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
783 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
784 !MI.getOperand(VDstIn_Idx).isReg() ||
785 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
786 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
787 MI.erase(&MI.getOperand(VDstIn_Idx));
789 MCOperand::createReg(MI.getOperand(Tied).getReg()),
790 AMDGPU::OpName::vdst_in);
791 }
792 }
793
794 int ImmLitIdx =
795 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
796 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
797 if (ImmLitIdx != -1 && !IsSOPK)
798 convertFMAanyK(MI, ImmLitIdx);
799
800 Size = MaxInstBytesNum - Bytes.size();
802}
803
805 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
806 // The MCInst still has these fields even though they are no longer encoded
807 // in the GFX11 instruction.
808 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
809 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
810 }
811}
812
815 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
816 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
817 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
818 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
819 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
820 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
821 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
822 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
823 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
824 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
825 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
826 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
827 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
828 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
829 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
830 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
831 // The MCInst has this field that is not directly encoded in the
832 // instruction.
833 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
834 }
835}
836
838 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
839 STI.hasFeature(AMDGPU::FeatureGFX10)) {
840 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
841 // VOPC - insert clamp
842 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
843 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
844 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
845 if (SDst != -1) {
846 // VOPC - insert VCC register as sdst
848 AMDGPU::OpName::sdst);
849 } else {
850 // VOP1/2 - insert omod if present in instruction
851 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
852 }
853 }
854}
855
856/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
857/// appropriate subregister for the used format width.
859 MCOperand &MO, uint8_t NumRegs) {
860 switch (NumRegs) {
861 case 4:
862 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
863 case 6:
864 return MO.setReg(
865 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
866 case 8:
867 // No-op in cases where one operand is still f8/bf8.
868 return;
869 default:
870 llvm_unreachable("Unexpected size for mfma f8f6f4 operand");
871 }
872}
873
874/// f8f6f4 instructions have different pseudos depending on the used formats. In
875/// the disassembler table, we only have the variants with the largest register
876/// classes which assume using an fp8/bf8 format for both operands. The actual
877/// register class depends on the format in blgp and cbsz operands. Adjust the
878/// register classes depending on the used format.
880 int BlgpIdx =
881 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
882 if (BlgpIdx == -1)
883 return;
884
885 int CbszIdx =
886 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
887
888 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
889 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
890
891 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
892 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
893 if (!AdjustedRegClassOpcode ||
894 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
895 return;
896
897 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
898 int Src0Idx =
899 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
900 int Src1Idx =
901 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
902 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
903 AdjustedRegClassOpcode->NumRegsSrcA);
904 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
905 AdjustedRegClassOpcode->NumRegsSrcB);
906}
907
909 unsigned OpSel = 0;
910 unsigned OpSelHi = 0;
911 unsigned NegLo = 0;
912 unsigned NegHi = 0;
913};
914
915// Reconstruct values of VOP3/VOP3P operands such as op_sel.
916// Note that these values do not affect disassembler output,
917// so this is only necessary for consistency with src_modifiers.
919 bool IsVOP3P = false) {
920 VOPModifiers Modifiers;
921 unsigned Opc = MI.getOpcode();
922 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
923 AMDGPU::OpName::src1_modifiers,
924 AMDGPU::OpName::src2_modifiers};
925 for (int J = 0; J < 3; ++J) {
926 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
927 if (OpIdx == -1)
928 continue;
929
930 unsigned Val = MI.getOperand(OpIdx).getImm();
931
932 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
933 if (IsVOP3P) {
934 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
935 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
936 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
937 } else if (J == 0) {
938 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
939 }
940 }
941
942 return Modifiers;
943}
944
945// Instructions decode the op_sel/suffix bits into the src_modifier
946// operands. Copy those bits into the src operands for true16 VGPRs.
948 const unsigned Opc = MI.getOpcode();
949 const MCRegisterClass &ConversionRC =
950 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
951 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
952 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
954 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
956 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
958 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
960 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
961 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
962 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
963 if (OpIdx == -1 || OpModsIdx == -1)
964 continue;
965 MCOperand &Op = MI.getOperand(OpIdx);
966 if (!Op.isReg())
967 continue;
968 if (!ConversionRC.contains(Op.getReg()))
969 continue;
970 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
971 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
972 unsigned ModVal = OpMods.getImm();
973 if (ModVal & OpSelMask) { // isHi
974 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
975 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
976 }
977 }
978}
979
980// MAC opcodes have special old and src2 operands.
981// src2 is tied to dst, while old is not tied (but assumed to be).
983 constexpr int DST_IDX = 0;
984 auto Opcode = MI.getOpcode();
985 const auto &Desc = MCII->get(Opcode);
986 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
987
988 if (OldIdx != -1 && Desc.getOperandConstraint(
989 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
990 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
991 assert(Desc.getOperandConstraint(
992 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
994 (void)DST_IDX;
995 return true;
996 }
997
998 return false;
999}
1000
1001// Create dummy old operand and insert dummy unused src2_modifiers
1003 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1004 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1006 AMDGPU::OpName::src2_modifiers);
1007}
1008
1010 unsigned Opc = MI.getOpcode();
1011
1012 int VDstInIdx =
1013 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1014 if (VDstInIdx != -1)
1015 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1016
1017 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1018 if (MI.getNumOperands() < DescNumOps &&
1019 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1021 auto Mods = collectVOPModifiers(MI);
1023 AMDGPU::OpName::op_sel);
1024 } else {
1025 // Insert dummy unused src modifiers.
1026 if (MI.getNumOperands() < DescNumOps &&
1027 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1029 AMDGPU::OpName::src0_modifiers);
1030
1031 if (MI.getNumOperands() < DescNumOps &&
1032 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1034 AMDGPU::OpName::src1_modifiers);
1035 }
1036}
1037
1040
1041 int VDstInIdx =
1042 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1043 if (VDstInIdx != -1)
1044 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1045
1046 unsigned Opc = MI.getOpcode();
1047 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1048 if (MI.getNumOperands() < DescNumOps &&
1049 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1050 auto Mods = collectVOPModifiers(MI);
1052 AMDGPU::OpName::op_sel);
1053 }
1054}
1055
1056// Note that before gfx10, the MIMG encoding provided no information about
1057// VADDR size. Consequently, decoded instructions always show address as if it
1058// has 1 dword, which could be not really so.
1060 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1061
1062 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1063 AMDGPU::OpName::vdst);
1064
1065 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1066 AMDGPU::OpName::vdata);
1067 int VAddr0Idx =
1068 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1069 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
1070 : AMDGPU::OpName::rsrc;
1071 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1072 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1073 AMDGPU::OpName::dmask);
1074
1075 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1076 AMDGPU::OpName::tfe);
1077 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1078 AMDGPU::OpName::d16);
1079
1080 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1081 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1083
1084 assert(VDataIdx != -1);
1085 if (BaseOpcode->BVH) {
1086 // Add A16 operand for intersect_ray instructions
1087 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1088 return;
1089 }
1090
1091 bool IsAtomic = (VDstIdx != -1);
1092 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1093 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1094 bool IsNSA = false;
1095 bool IsPartialNSA = false;
1096 unsigned AddrSize = Info->VAddrDwords;
1097
1098 if (isGFX10Plus()) {
1099 unsigned DimIdx =
1100 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1101 int A16Idx =
1102 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1103 const AMDGPU::MIMGDimInfo *Dim =
1104 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1105 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1106
1107 AddrSize =
1108 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1109
1110 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1111 // VIMAGE insts other than BVH never use vaddr4.
1112 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1113 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1114 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1115 if (!IsNSA) {
1116 if (!IsVSample && AddrSize > 12)
1117 AddrSize = 16;
1118 } else {
1119 if (AddrSize > Info->VAddrDwords) {
1120 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1121 // The NSA encoding does not contain enough operands for the
1122 // combination of base opcode / dimension. Should this be an error?
1123 return;
1124 }
1125 IsPartialNSA = true;
1126 }
1127 }
1128 }
1129
1130 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1131 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1132
1133 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1134 if (D16 && AMDGPU::hasPackedD16(STI)) {
1135 DstSize = (DstSize + 1) / 2;
1136 }
1137
1138 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1139 DstSize += 1;
1140
1141 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1142 return;
1143
1144 int NewOpcode =
1145 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1146 if (NewOpcode == -1)
1147 return;
1148
1149 // Widen the register to the correct number of enabled channels.
1150 MCRegister NewVdata;
1151 if (DstSize != Info->VDataDwords) {
1152 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1153
1154 // Get first subregister of VData
1155 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1156 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1157 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1158
1159 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1160 &MRI.getRegClass(DataRCID));
1161 if (!NewVdata) {
1162 // It's possible to encode this such that the low register + enabled
1163 // components exceeds the register count.
1164 return;
1165 }
1166 }
1167
1168 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1169 // If using partial NSA on GFX11+ widen last address register.
1170 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1171 MCRegister NewVAddrSA;
1172 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1173 AddrSize != Info->VAddrDwords) {
1174 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1175 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1176 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1177
1178 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1179 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1180 &MRI.getRegClass(AddrRCID));
1181 if (!NewVAddrSA)
1182 return;
1183 }
1184
1185 MI.setOpcode(NewOpcode);
1186
1187 if (NewVdata != AMDGPU::NoRegister) {
1188 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1189
1190 if (IsAtomic) {
1191 // Atomic operations have an additional operand (a copy of data)
1192 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1193 }
1194 }
1195
1196 if (NewVAddrSA) {
1197 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1198 } else if (IsNSA) {
1199 assert(AddrSize <= Info->VAddrDwords);
1200 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1201 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1202 }
1203}
1204
1205// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1206// decoder only adds to src_modifiers, so manually add the bits to the other
1207// operands.
1209 unsigned Opc = MI.getOpcode();
1210 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1211 auto Mods = collectVOPModifiers(MI, true);
1212
1213 if (MI.getNumOperands() < DescNumOps &&
1214 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1215 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1216
1217 if (MI.getNumOperands() < DescNumOps &&
1218 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1220 AMDGPU::OpName::op_sel);
1221 if (MI.getNumOperands() < DescNumOps &&
1222 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1224 AMDGPU::OpName::op_sel_hi);
1225 if (MI.getNumOperands() < DescNumOps &&
1226 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1228 AMDGPU::OpName::neg_lo);
1229 if (MI.getNumOperands() < DescNumOps &&
1230 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1232 AMDGPU::OpName::neg_hi);
1233}
1234
1235// Create dummy old operand and insert optional operands
1237 unsigned Opc = MI.getOpcode();
1238 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1239
1240 if (MI.getNumOperands() < DescNumOps &&
1241 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1242 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1243
1244 if (MI.getNumOperands() < DescNumOps &&
1245 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1247 AMDGPU::OpName::src0_modifiers);
1248
1249 if (MI.getNumOperands() < DescNumOps &&
1250 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1252 AMDGPU::OpName::src1_modifiers);
1253}
1254
1255void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1256 assert(HasLiteral && "Should have decoded a literal");
1257 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1258 unsigned DescNumOps = Desc.getNumOperands();
1260 AMDGPU::OpName::immDeferred);
1261 assert(DescNumOps == MI.getNumOperands());
1262 for (unsigned I = 0; I < DescNumOps; ++I) {
1263 auto &Op = MI.getOperand(I);
1264 auto OpType = Desc.operands()[I].OperandType;
1265 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1267 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1268 IsDeferredOp)
1269 Op.setImm(Literal);
1270 }
1271}
1272
1273const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1274 return getContext().getRegisterInfo()->
1275 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1276}
1277
1278inline
1280 const Twine& ErrMsg) const {
1281 *CommentStream << "Error: " + ErrMsg;
1282
1283 // ToDo: add support for error operands to MCInst.h
1284 // return MCOperand::createError(V);
1285 return MCOperand();
1286}
1287
1288inline
1291}
1292
1293inline
1295 unsigned Val) const {
1296 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1297 if (Val >= RegCl.getNumRegs())
1298 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1299 ": unknown register " + Twine(Val));
1300 return createRegOperand(RegCl.getRegister(Val));
1301}
1302
1303inline
1305 unsigned Val) const {
1306 // ToDo: SI/CI have 104 SGPRs, VI - 102
1307 // Valery: here we accepting as much as we can, let assembler sort it out
1308 int shift = 0;
1309 switch (SRegClassID) {
1310 case AMDGPU::SGPR_32RegClassID:
1311 case AMDGPU::TTMP_32RegClassID:
1312 break;
1313 case AMDGPU::SGPR_64RegClassID:
1314 case AMDGPU::TTMP_64RegClassID:
1315 shift = 1;
1316 break;
1317 case AMDGPU::SGPR_96RegClassID:
1318 case AMDGPU::TTMP_96RegClassID:
1319 case AMDGPU::SGPR_128RegClassID:
1320 case AMDGPU::TTMP_128RegClassID:
1321 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1322 // this bundle?
1323 case AMDGPU::SGPR_256RegClassID:
1324 case AMDGPU::TTMP_256RegClassID:
1325 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1326 // this bundle?
1327 case AMDGPU::SGPR_288RegClassID:
1328 case AMDGPU::TTMP_288RegClassID:
1329 case AMDGPU::SGPR_320RegClassID:
1330 case AMDGPU::TTMP_320RegClassID:
1331 case AMDGPU::SGPR_352RegClassID:
1332 case AMDGPU::TTMP_352RegClassID:
1333 case AMDGPU::SGPR_384RegClassID:
1334 case AMDGPU::TTMP_384RegClassID:
1335 case AMDGPU::SGPR_512RegClassID:
1336 case AMDGPU::TTMP_512RegClassID:
1337 shift = 2;
1338 break;
1339 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1340 // this bundle?
1341 default:
1342 llvm_unreachable("unhandled register class");
1343 }
1344
1345 if (Val % (1 << shift)) {
1346 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1347 << ": scalar reg isn't aligned " << Val;
1348 }
1349
1350 return createRegOperand(SRegClassID, Val >> shift);
1351}
1352
1354 bool IsHi) const {
1355 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1356 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1357}
1358
1359// Decode Literals for insts which always have a literal in the encoding
1362 if (HasLiteral) {
1363 assert(
1365 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1366 if (Literal != Val)
1367 return errOperand(Val, "More than one unique literal is illegal");
1368 }
1369 HasLiteral = true;
1370 Literal = Val;
1371 return MCOperand::createImm(Literal);
1372}
1373
1375 // For now all literal constants are supposed to be unsigned integer
1376 // ToDo: deal with signed/unsigned 64-bit integer constants
1377 // ToDo: deal with float/double constants
1378 if (!HasLiteral) {
1379 if (Bytes.size() < 4) {
1380 return errOperand(0, "cannot read literal, inst bytes left " +
1381 Twine(Bytes.size()));
1382 }
1383 HasLiteral = true;
1384 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1385 if (ExtendFP64)
1386 Literal64 <<= 32;
1387 }
1388 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1389}
1390
1392 using namespace AMDGPU::EncValues;
1393
1394 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1395 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1396 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1397 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1398 // Cast prevents negative overflow.
1399}
1400
1401static int64_t getInlineImmVal32(unsigned Imm) {
1402 switch (Imm) {
1403 case 240:
1404 return llvm::bit_cast<uint32_t>(0.5f);
1405 case 241:
1406 return llvm::bit_cast<uint32_t>(-0.5f);
1407 case 242:
1408 return llvm::bit_cast<uint32_t>(1.0f);
1409 case 243:
1410 return llvm::bit_cast<uint32_t>(-1.0f);
1411 case 244:
1412 return llvm::bit_cast<uint32_t>(2.0f);
1413 case 245:
1414 return llvm::bit_cast<uint32_t>(-2.0f);
1415 case 246:
1416 return llvm::bit_cast<uint32_t>(4.0f);
1417 case 247:
1418 return llvm::bit_cast<uint32_t>(-4.0f);
1419 case 248: // 1 / (2 * PI)
1420 return 0x3e22f983;
1421 default:
1422 llvm_unreachable("invalid fp inline imm");
1423 }
1424}
1425
1426static int64_t getInlineImmVal64(unsigned Imm) {
1427 switch (Imm) {
1428 case 240:
1429 return llvm::bit_cast<uint64_t>(0.5);
1430 case 241:
1431 return llvm::bit_cast<uint64_t>(-0.5);
1432 case 242:
1433 return llvm::bit_cast<uint64_t>(1.0);
1434 case 243:
1435 return llvm::bit_cast<uint64_t>(-1.0);
1436 case 244:
1437 return llvm::bit_cast<uint64_t>(2.0);
1438 case 245:
1439 return llvm::bit_cast<uint64_t>(-2.0);
1440 case 246:
1441 return llvm::bit_cast<uint64_t>(4.0);
1442 case 247:
1443 return llvm::bit_cast<uint64_t>(-4.0);
1444 case 248: // 1 / (2 * PI)
1445 return 0x3fc45f306dc9c882;
1446 default:
1447 llvm_unreachable("invalid fp inline imm");
1448 }
1449}
1450
1451static int64_t getInlineImmValF16(unsigned Imm) {
1452 switch (Imm) {
1453 case 240:
1454 return 0x3800;
1455 case 241:
1456 return 0xB800;
1457 case 242:
1458 return 0x3C00;
1459 case 243:
1460 return 0xBC00;
1461 case 244:
1462 return 0x4000;
1463 case 245:
1464 return 0xC000;
1465 case 246:
1466 return 0x4400;
1467 case 247:
1468 return 0xC400;
1469 case 248: // 1 / (2 * PI)
1470 return 0x3118;
1471 default:
1472 llvm_unreachable("invalid fp inline imm");
1473 }
1474}
1475
1476static int64_t getInlineImmValBF16(unsigned Imm) {
1477 switch (Imm) {
1478 case 240:
1479 return 0x3F00;
1480 case 241:
1481 return 0xBF00;
1482 case 242:
1483 return 0x3F80;
1484 case 243:
1485 return 0xBF80;
1486 case 244:
1487 return 0x4000;
1488 case 245:
1489 return 0xC000;
1490 case 246:
1491 return 0x4080;
1492 case 247:
1493 return 0xC080;
1494 case 248: // 1 / (2 * PI)
1495 return 0x3E22;
1496 default:
1497 llvm_unreachable("invalid fp inline imm");
1498 }
1499}
1500
1501static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1503 : getInlineImmValF16(Imm);
1504}
1505
1506MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1510
1511 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1512 // ImmWidth 0 is a default case where operand should not allow immediates.
1513 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1514 // use it to print verbose error message.
1515 switch (ImmWidth) {
1516 case 0:
1517 case 32:
1519 case 64:
1521 case 16:
1522 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1523 default:
1524 llvm_unreachable("implement me");
1525 }
1526}
1527
1529 using namespace AMDGPU;
1530
1531 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1532 switch (Width) {
1533 default: // fall
1534 case OPW32:
1535 case OPW16:
1536 case OPWV216:
1537 return VGPR_32RegClassID;
1538 case OPW64:
1539 case OPWV232: return VReg_64RegClassID;
1540 case OPW96: return VReg_96RegClassID;
1541 case OPW128: return VReg_128RegClassID;
1542 case OPW192: return VReg_192RegClassID;
1543 case OPW160: return VReg_160RegClassID;
1544 case OPW256: return VReg_256RegClassID;
1545 case OPW288: return VReg_288RegClassID;
1546 case OPW320: return VReg_320RegClassID;
1547 case OPW352: return VReg_352RegClassID;
1548 case OPW384: return VReg_384RegClassID;
1549 case OPW512: return VReg_512RegClassID;
1550 case OPW1024: return VReg_1024RegClassID;
1551 }
1552}
1553
1555 using namespace AMDGPU;
1556
1557 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1558 switch (Width) {
1559 default: // fall
1560 case OPW32:
1561 case OPW16:
1562 case OPWV216:
1563 return AGPR_32RegClassID;
1564 case OPW64:
1565 case OPWV232: return AReg_64RegClassID;
1566 case OPW96: return AReg_96RegClassID;
1567 case OPW128: return AReg_128RegClassID;
1568 case OPW160: return AReg_160RegClassID;
1569 case OPW256: return AReg_256RegClassID;
1570 case OPW288: return AReg_288RegClassID;
1571 case OPW320: return AReg_320RegClassID;
1572 case OPW352: return AReg_352RegClassID;
1573 case OPW384: return AReg_384RegClassID;
1574 case OPW512: return AReg_512RegClassID;
1575 case OPW1024: return AReg_1024RegClassID;
1576 }
1577}
1578
1579
1581 using namespace AMDGPU;
1582
1583 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1584 switch (Width) {
1585 default: // fall
1586 case OPW32:
1587 case OPW16:
1588 case OPWV216:
1589 return SGPR_32RegClassID;
1590 case OPW64:
1591 case OPWV232: return SGPR_64RegClassID;
1592 case OPW96: return SGPR_96RegClassID;
1593 case OPW128: return SGPR_128RegClassID;
1594 case OPW160: return SGPR_160RegClassID;
1595 case OPW256: return SGPR_256RegClassID;
1596 case OPW288: return SGPR_288RegClassID;
1597 case OPW320: return SGPR_320RegClassID;
1598 case OPW352: return SGPR_352RegClassID;
1599 case OPW384: return SGPR_384RegClassID;
1600 case OPW512: return SGPR_512RegClassID;
1601 }
1602}
1603
1605 using namespace AMDGPU;
1606
1607 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1608 switch (Width) {
1609 default: // fall
1610 case OPW32:
1611 case OPW16:
1612 case OPWV216:
1613 return TTMP_32RegClassID;
1614 case OPW64:
1615 case OPWV232: return TTMP_64RegClassID;
1616 case OPW128: return TTMP_128RegClassID;
1617 case OPW256: return TTMP_256RegClassID;
1618 case OPW288: return TTMP_288RegClassID;
1619 case OPW320: return TTMP_320RegClassID;
1620 case OPW352: return TTMP_352RegClassID;
1621 case OPW384: return TTMP_384RegClassID;
1622 case OPW512: return TTMP_512RegClassID;
1623 }
1624}
1625
1626int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1627 using namespace AMDGPU::EncValues;
1628
1629 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1630 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1631
1632 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1633}
1634
1636 bool MandatoryLiteral,
1637 unsigned ImmWidth,
1638 AMDGPU::OperandSemantics Sema) const {
1639 using namespace AMDGPU::EncValues;
1640
1641 assert(Val < 1024); // enum10
1642
1643 bool IsAGPR = Val & 512;
1644 Val &= 511;
1645
1646 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1647 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1648 : getVgprClassId(Width), Val - VGPR_MIN);
1649 }
1650 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1651 Sema);
1652}
1653
1656 bool MandatoryLiteral, unsigned ImmWidth,
1657 AMDGPU::OperandSemantics Sema) const {
1658 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1659 // decoded earlier.
1660 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1661 using namespace AMDGPU::EncValues;
1662
1663 if (Val <= SGPR_MAX) {
1664 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1665 static_assert(SGPR_MIN == 0);
1666 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1667 }
1668
1669 int TTmpIdx = getTTmpIdx(Val);
1670 if (TTmpIdx >= 0) {
1671 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1672 }
1673
1674 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1675 return decodeIntImmed(Val);
1676
1677 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1678 return decodeFPImmed(ImmWidth, Val, Sema);
1679
1680 if (Val == LITERAL_CONST) {
1681 if (MandatoryLiteral)
1682 // Keep a sentinel value for deferred setting
1683 return MCOperand::createImm(LITERAL_CONST);
1685 }
1686
1687 switch (Width) {
1688 case OPW32:
1689 case OPW16:
1690 case OPWV216:
1691 return decodeSpecialReg32(Val);
1692 case OPW64:
1693 case OPWV232:
1694 return decodeSpecialReg64(Val);
1695 default:
1696 llvm_unreachable("unexpected immediate type");
1697 }
1698}
1699
1700// Bit 0 of DstY isn't stored in the instruction, because it's always the
1701// opposite of bit 0 of DstX.
1703 unsigned Val) const {
1704 int VDstXInd =
1705 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1706 assert(VDstXInd != -1);
1707 assert(Inst.getOperand(VDstXInd).isReg());
1708 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1709 Val |= ~XDstReg & 1;
1711 return createRegOperand(getVgprClassId(Width), Val);
1712}
1713
1715 using namespace AMDGPU;
1716
1717 switch (Val) {
1718 // clang-format off
1719 case 102: return createRegOperand(FLAT_SCR_LO);
1720 case 103: return createRegOperand(FLAT_SCR_HI);
1721 case 104: return createRegOperand(XNACK_MASK_LO);
1722 case 105: return createRegOperand(XNACK_MASK_HI);
1723 case 106: return createRegOperand(VCC_LO);
1724 case 107: return createRegOperand(VCC_HI);
1725 case 108: return createRegOperand(TBA_LO);
1726 case 109: return createRegOperand(TBA_HI);
1727 case 110: return createRegOperand(TMA_LO);
1728 case 111: return createRegOperand(TMA_HI);
1729 case 124:
1730 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1731 case 125:
1732 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1733 case 126: return createRegOperand(EXEC_LO);
1734 case 127: return createRegOperand(EXEC_HI);
1735 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1736 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1737 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1738 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1739 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1740 case 251: return createRegOperand(SRC_VCCZ);
1741 case 252: return createRegOperand(SRC_EXECZ);
1742 case 253: return createRegOperand(SRC_SCC);
1743 case 254: return createRegOperand(LDS_DIRECT);
1744 default: break;
1745 // clang-format on
1746 }
1747 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1748}
1749
1751 using namespace AMDGPU;
1752
1753 switch (Val) {
1754 case 102: return createRegOperand(FLAT_SCR);
1755 case 104: return createRegOperand(XNACK_MASK);
1756 case 106: return createRegOperand(VCC);
1757 case 108: return createRegOperand(TBA);
1758 case 110: return createRegOperand(TMA);
1759 case 124:
1760 if (isGFX11Plus())
1761 return createRegOperand(SGPR_NULL);
1762 break;
1763 case 125:
1764 if (!isGFX11Plus())
1765 return createRegOperand(SGPR_NULL);
1766 break;
1767 case 126: return createRegOperand(EXEC);
1768 case 235: return createRegOperand(SRC_SHARED_BASE);
1769 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1770 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1771 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1772 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1773 case 251: return createRegOperand(SRC_VCCZ);
1774 case 252: return createRegOperand(SRC_EXECZ);
1775 case 253: return createRegOperand(SRC_SCC);
1776 default: break;
1777 }
1778 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1779}
1780
1782AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1783 unsigned ImmWidth,
1784 AMDGPU::OperandSemantics Sema) const {
1785 using namespace AMDGPU::SDWA;
1786 using namespace AMDGPU::EncValues;
1787
1788 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1789 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1790 // XXX: cast to int is needed to avoid stupid warning:
1791 // compare with unsigned is always true
1792 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1793 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1794 return createRegOperand(getVgprClassId(Width),
1795 Val - SDWA9EncValues::SRC_VGPR_MIN);
1796 }
1797 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1798 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1799 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1800 return createSRegOperand(getSgprClassId(Width),
1801 Val - SDWA9EncValues::SRC_SGPR_MIN);
1802 }
1803 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1804 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1805 return createSRegOperand(getTtmpClassId(Width),
1806 Val - SDWA9EncValues::SRC_TTMP_MIN);
1807 }
1808
1809 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1810
1811 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1812 return decodeIntImmed(SVal);
1813
1814 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1815 return decodeFPImmed(ImmWidth, SVal, Sema);
1816
1817 return decodeSpecialReg32(SVal);
1818 }
1819 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1820 return createRegOperand(getVgprClassId(Width), Val);
1821 llvm_unreachable("unsupported target");
1822}
1823
1826}
1827
1830}
1831
1833 using namespace AMDGPU::SDWA;
1834
1835 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1836 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1837 "SDWAVopcDst should be present only on GFX9+");
1838
1839 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
1840
1841 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1842 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1843
1844 int TTmpIdx = getTTmpIdx(Val);
1845 if (TTmpIdx >= 0) {
1846 auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64);
1847 return createSRegOperand(TTmpClsId, TTmpIdx);
1848 }
1849 if (Val > SGPR_MAX) {
1850 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
1851 }
1852 return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val);
1853 }
1854 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
1855}
1856
1858 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
1859 ? decodeSrcOp(OPW32, Val)
1860 : decodeSrcOp(OPW64, Val);
1861}
1862
1864 return decodeSrcOp(OPW32, Val);
1865}
1866
1869 return MCOperand();
1870 return MCOperand::createImm(Val);
1871}
1872
1874 using VersionField = AMDGPU::EncodingField<7, 0>;
1875 using W64Bit = AMDGPU::EncodingBit<13>;
1876 using W32Bit = AMDGPU::EncodingBit<14>;
1877 using MDPBit = AMDGPU::EncodingBit<15>;
1879
1880 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1881
1882 // Decode into a plain immediate if any unused bits are raised.
1883 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1884 return MCOperand::createImm(Imm);
1885
1886 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1887 const auto *I = find_if(
1888 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1889 return V.Code == Version;
1890 });
1891 MCContext &Ctx = getContext();
1892 const MCExpr *E;
1893 if (I == Versions.end())
1895 else
1896 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1897
1898 if (W64)
1899 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1900 if (W32)
1901 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1902 if (MDP)
1903 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1904
1905 return MCOperand::createExpr(E);
1906}
1907
1909 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1910}
1911
1913
1915 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1916}
1917
1919
1921
1923 return AMDGPU::isGFX10Plus(STI);
1924}
1925
1927 return STI.hasFeature(AMDGPU::FeatureGFX11);
1928}
1929
1931 return AMDGPU::isGFX11Plus(STI);
1932}
1933
1935 return STI.hasFeature(AMDGPU::FeatureGFX12);
1936}
1937
1939 return AMDGPU::isGFX12Plus(STI);
1940}
1941
1943 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1944}
1945
1948}
1949
1950//===----------------------------------------------------------------------===//
1951// AMDGPU specific symbol handling
1952//===----------------------------------------------------------------------===//
1953
1954/// Print a string describing the reserved bit range specified by Mask with
1955/// offset BaseBytes for use in error comments. Mask is a single continuous
1956/// range of 1s surrounded by zeros. The format here is meant to align with the
1957/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1958static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1959 SmallString<32> Result;
1960 raw_svector_ostream S(Result);
1961
1962 int TrailingZeros = llvm::countr_zero(Mask);
1963 int PopCount = llvm::popcount(Mask);
1964
1965 if (PopCount == 1) {
1966 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1967 } else {
1968 S << "bits in range ("
1969 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1970 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1971 }
1972
1973 return Result;
1974}
1975
1976#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1977#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1978 do { \
1979 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1980 } while (0)
1981#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1982 do { \
1983 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1984 << GET_FIELD(MASK) << '\n'; \
1985 } while (0)
1986
1987#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1988 do { \
1989 if (FourByteBuffer & (MASK)) { \
1990 return createStringError(std::errc::invalid_argument, \
1991 "kernel descriptor " DESC \
1992 " reserved %s set" MSG, \
1993 getBitRangeFromMask((MASK), 0).c_str()); \
1994 } \
1995 } while (0)
1996
1997#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1998#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1999 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2000#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2001 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2002#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2003 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2004
2005// NOLINTNEXTLINE(readability-identifier-naming)
2007 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2008 using namespace amdhsa;
2009 StringRef Indent = "\t";
2010
2011 // We cannot accurately backward compute #VGPRs used from
2012 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2013 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2014 // simply calculate the inverse of what the assembler does.
2015
2016 uint32_t GranulatedWorkitemVGPRCount =
2017 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2018
2019 uint32_t NextFreeVGPR =
2020 (GranulatedWorkitemVGPRCount + 1) *
2021 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2022
2023 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2024
2025 // We cannot backward compute values used to calculate
2026 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2027 // directives can't be computed:
2028 // .amdhsa_reserve_vcc
2029 // .amdhsa_reserve_flat_scratch
2030 // .amdhsa_reserve_xnack_mask
2031 // They take their respective default values if not specified in the assembly.
2032 //
2033 // GRANULATED_WAVEFRONT_SGPR_COUNT
2034 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2035 //
2036 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2037 // are set to 0. So while disassembling we consider that:
2038 //
2039 // GRANULATED_WAVEFRONT_SGPR_COUNT
2040 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2041 //
2042 // The disassembler cannot recover the original values of those 3 directives.
2043
2044 uint32_t GranulatedWavefrontSGPRCount =
2045 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2046
2047 if (isGFX10Plus())
2048 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2049 "must be zero on gfx10+");
2050
2051 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2053
2054 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2056 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2057 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2058 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2059
2060 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2061
2062 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2063 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2064 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2065 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2066 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2067 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2068 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2069 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2070
2071 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2072
2073 if (!isGFX12Plus())
2074 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2075 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2076
2077 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2078
2079 if (!isGFX12Plus())
2080 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2081 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2082
2083 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2084 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2085
2086 if (isGFX9Plus())
2087 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2088
2089 if (!isGFX9Plus())
2090 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2091 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2092
2093 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
2094
2095 if (!isGFX10Plus())
2096 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
2097 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
2098
2099 if (isGFX10Plus()) {
2100 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2101 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2102 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2103 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2104 }
2105
2106 if (isGFX12Plus())
2107 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2108 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2109
2110 return true;
2111}
2112
2113// NOLINTNEXTLINE(readability-identifier-naming)
2115 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2116 using namespace amdhsa;
2117 StringRef Indent = "\t";
2119 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2120 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2121 else
2122 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2123 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2124 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2125 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2126 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2127 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2128 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2129 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2130 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2131 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2132 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2133 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2134
2135 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2136 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2137 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2138
2140 ".amdhsa_exception_fp_ieee_invalid_op",
2141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2142 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2145 ".amdhsa_exception_fp_ieee_div_zero",
2146 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2147 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2149 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2150 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2151 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2153 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2154 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2155
2156 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2157
2158 return true;
2159}
2160
2161// NOLINTNEXTLINE(readability-identifier-naming)
2163 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2164 using namespace amdhsa;
2165 StringRef Indent = "\t";
2166 if (isGFX90A()) {
2167 KdStream << Indent << ".amdhsa_accum_offset "
2168 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2169 << '\n';
2170
2171 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2172
2173 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2174 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2175 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2176 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2177 } else if (isGFX10Plus()) {
2178 // Bits [0-3].
2179 if (!isGFX12Plus()) {
2180 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2181 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2182 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2183 } else {
2185 "SHARED_VGPR_COUNT",
2186 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2187 }
2188 } else {
2189 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2190 "COMPUTE_PGM_RSRC3",
2191 "must be zero on gfx12+");
2192 }
2193
2194 // Bits [4-11].
2195 if (isGFX11()) {
2196 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2197 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2198 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2199 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2200 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2201 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2202 } else if (isGFX12Plus()) {
2204 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2205 } else {
2206 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2207 "COMPUTE_PGM_RSRC3",
2208 "must be zero on gfx10");
2209 }
2210
2211 // Bits [12].
2212 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2213 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2214
2215 // Bits [13].
2216 if (isGFX12Plus()) {
2218 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2219 } else {
2220 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2221 "COMPUTE_PGM_RSRC3",
2222 "must be zero on gfx10 or gfx11");
2223 }
2224
2225 // Bits [14-30].
2226 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2227 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2228
2229 // Bits [31].
2230 if (isGFX11Plus()) {
2232 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2233 } else {
2234 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2235 "COMPUTE_PGM_RSRC3",
2236 "must be zero on gfx10");
2237 }
2238 } else if (FourByteBuffer) {
2239 return createStringError(
2240 std::errc::invalid_argument,
2241 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2242 }
2243 return true;
2244}
2245#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2246#undef PRINT_DIRECTIVE
2247#undef GET_FIELD
2248#undef CHECK_RESERVED_BITS_IMPL
2249#undef CHECK_RESERVED_BITS
2250#undef CHECK_RESERVED_BITS_MSG
2251#undef CHECK_RESERVED_BITS_DESC
2252#undef CHECK_RESERVED_BITS_DESC_MSG
2253
2254/// Create an error object to return from onSymbolStart for reserved kernel
2255/// descriptor bits being set.
2256static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2257 const char *Msg = "") {
2258 return createStringError(
2259 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2260 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2261}
2262
2263/// Create an error object to return from onSymbolStart for reserved kernel
2264/// descriptor bytes being set.
2265static Error createReservedKDBytesError(unsigned BaseInBytes,
2266 unsigned WidthInBytes) {
2267 // Create an error comment in the same format as the "Kernel Descriptor"
2268 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2269 return createStringError(
2270 std::errc::invalid_argument,
2271 "kernel descriptor reserved bits in range (%u:%u) set",
2272 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2273}
2274
2277 raw_string_ostream &KdStream) const {
2278#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2279 do { \
2280 KdStream << Indent << DIRECTIVE " " \
2281 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2282 } while (0)
2283
2284 uint16_t TwoByteBuffer = 0;
2285 uint32_t FourByteBuffer = 0;
2286
2287 StringRef ReservedBytes;
2288 StringRef Indent = "\t";
2289
2290 assert(Bytes.size() == 64);
2291 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2292
2293 switch (Cursor.tell()) {
2295 FourByteBuffer = DE.getU32(Cursor);
2296 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2297 << '\n';
2298 return true;
2299
2301 FourByteBuffer = DE.getU32(Cursor);
2302 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2303 << FourByteBuffer << '\n';
2304 return true;
2305
2307 FourByteBuffer = DE.getU32(Cursor);
2308 KdStream << Indent << ".amdhsa_kernarg_size "
2309 << FourByteBuffer << '\n';
2310 return true;
2311
2313 // 4 reserved bytes, must be 0.
2314 ReservedBytes = DE.getBytes(Cursor, 4);
2315 for (int I = 0; I < 4; ++I) {
2316 if (ReservedBytes[I] != 0)
2318 }
2319 return true;
2320
2322 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2323 // So far no directive controls this for Code Object V3, so simply skip for
2324 // disassembly.
2325 DE.skip(Cursor, 8);
2326 return true;
2327
2329 // 20 reserved bytes, must be 0.
2330 ReservedBytes = DE.getBytes(Cursor, 20);
2331 for (int I = 0; I < 20; ++I) {
2332 if (ReservedBytes[I] != 0)
2334 }
2335 return true;
2336
2338 FourByteBuffer = DE.getU32(Cursor);
2339 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2340
2342 FourByteBuffer = DE.getU32(Cursor);
2343 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2344
2346 FourByteBuffer = DE.getU32(Cursor);
2347 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2348
2350 using namespace amdhsa;
2351 TwoByteBuffer = DE.getU16(Cursor);
2352
2354 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2355 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2356 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2357 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2358 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2359 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2360 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2361 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2362 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2363 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2365 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2366 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2367 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2368 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2369
2370 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2371 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2373
2374 // Reserved for GFX9
2375 if (isGFX9() &&
2376 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2378 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2379 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2380 }
2381 if (isGFX10Plus()) {
2382 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2383 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2384 }
2385
2386 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2387 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2388 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2389
2390 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2391 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2393 }
2394
2395 return true;
2396
2398 using namespace amdhsa;
2399 TwoByteBuffer = DE.getU16(Cursor);
2400 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2401 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2402 KERNARG_PRELOAD_SPEC_LENGTH);
2403 }
2404
2405 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2406 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2407 KERNARG_PRELOAD_SPEC_OFFSET);
2408 }
2409 return true;
2410
2412 // 4 bytes from here are reserved, must be 0.
2413 ReservedBytes = DE.getBytes(Cursor, 4);
2414 for (int I = 0; I < 4; ++I) {
2415 if (ReservedBytes[I] != 0)
2417 }
2418 return true;
2419
2420 default:
2421 llvm_unreachable("Unhandled index. Case statements cover everything.");
2422 return true;
2423 }
2424#undef PRINT_DIRECTIVE
2425}
2426
2428 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2429
2430 // CP microcode requires the kernel descriptor to be 64 aligned.
2431 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2432 return createStringError(std::errc::invalid_argument,
2433 "kernel descriptor must be 64-byte aligned");
2434
2435 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2436 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2437 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2438 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2439 // when required.
2440 if (isGFX10Plus()) {
2441 uint16_t KernelCodeProperties =
2444 EnableWavefrontSize32 =
2445 AMDHSA_BITS_GET(KernelCodeProperties,
2446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2447 }
2448
2449 std::string Kd;
2450 raw_string_ostream KdStream(Kd);
2451 KdStream << ".amdhsa_kernel " << KdName << '\n';
2452
2454 while (C && C.tell() < Bytes.size()) {
2455 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2456
2457 cantFail(C.takeError());
2458
2459 if (!Res)
2460 return Res;
2461 }
2462 KdStream << ".end_amdhsa_kernel\n";
2463 outs() << KdStream.str();
2464 return true;
2465}
2466
2468 uint64_t &Size,
2469 ArrayRef<uint8_t> Bytes,
2470 uint64_t Address) const {
2471 // Right now only kernel descriptor needs to be handled.
2472 // We ignore all other symbols for target specific handling.
2473 // TODO:
2474 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2475 // Object V2 and V3 when symbols are marked protected.
2476
2477 // amd_kernel_code_t for Code Object V2.
2478 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2479 Size = 256;
2480 return createStringError(std::errc::invalid_argument,
2481 "code object v2 is not supported");
2482 }
2483
2484 // Code Object V3 kernel descriptors.
2485 StringRef Name = Symbol.Name;
2486 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2487 Size = 64; // Size = 64 regardless of success or failure.
2488 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2489 }
2490
2491 return false;
2492}
2493
2494const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2495 int64_t Val) {
2496 MCContext &Ctx = getContext();
2497 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2498 // Note: only set value to Val on a new symbol in case an dissassembler
2499 // has already been initialized in this context.
2500 if (!Sym->isVariable()) {
2501 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2502 } else {
2503 int64_t Res = ~Val;
2504 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2505 if (!Valid || Res != Val)
2506 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2507 }
2508 return MCSymbolRefExpr::create(Sym, Ctx);
2509}
2510
2511//===----------------------------------------------------------------------===//
2512// AMDGPUSymbolizer
2513//===----------------------------------------------------------------------===//
2514
2515// Try to find symbol name for specified label
2517 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2518 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2519 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2520
2521 if (!IsBranch) {
2522 return false;
2523 }
2524
2525 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2526 if (!Symbols)
2527 return false;
2528
2529 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2530 return Val.Addr == static_cast<uint64_t>(Value) &&
2531 Val.Type == ELF::STT_NOTYPE;
2532 });
2533 if (Result != Symbols->end()) {
2534 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2535 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2537 return true;
2538 }
2539 // Add to list of referenced addresses, so caller can synthesize a label.
2540 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2541 return false;
2542}
2543
2545 int64_t Value,
2546 uint64_t Address) {
2547 llvm_unreachable("unimplemented");
2548}
2549
2550//===----------------------------------------------------------------------===//
2551// Initialization
2552//===----------------------------------------------------------------------===//
2553
2555 LLVMOpInfoCallback /*GetOpInfo*/,
2556 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2557 void *DisInfo,
2558 MCContext *Ctx,
2559 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2560 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2561}
2562
2564 const MCSubtargetInfo &STI,
2565 MCContext &Ctx) {
2566 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2567}
2568
2574}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecoderUInt128 eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:128
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
const T * data() const
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:602
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:222
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportWarning(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1079
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
unsigned getNumOperands() const
Definition: MCInst.h:209
unsigned getOpcode() const
Definition: MCInst.h:199
void addOperand(const MCOperand Op)
Definition: MCInst.h:211
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:207
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:37
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:163
int64_t getImm() const
Definition: MCInst.h:81
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:135
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:142
void setReg(MCRegister Reg)
Set the register number.
Definition: MCInst.h:76
bool isReg() const
Definition: MCInst.h:62
MCRegister getReg() const
Returns the register number.
Definition: MCInst.h:70
bool isValid() const
Definition: MCInst.h:61
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
Represents a location in source code.
Definition: SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1358
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1372
@ STT_OBJECT
Definition: ELF.h:1359
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:402
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1291
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.