LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81 unsigned EFlags) const {
82 OS << "\t.amdgcn_target \""
83 << STI.getTargetTriple().normalize(Triple::CanonicalForm::FOUR_IDENT)
84 << '-';
85
86 // Get CPU name from ELF e_flags MACH field
87 unsigned MACH = EFlags & ELF::EF_AMDGPU_MACH;
88
89#define X(NUM, ENUM, NAME) \
90 case ELF::ENUM: \
91 OS << NAME; \
92 break;
93 switch (MACH) {
95 default:
96 OS << "unknown";
97 break;
98 }
99#undef X
100
101 // Add xnack and sramecc from ELF flags (v4 format)
102 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV4) {
103 unsigned SrameccSetting = EFlags & ELF::EF_AMDGPU_FEATURE_SRAMECC_V4;
104 switch (SrameccSetting) {
107 break;
109 OS << ":sramecc-";
110 break;
112 OS << ":sramecc+";
113 break;
114 }
115
116 unsigned XnackSetting = EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V4;
117 switch (XnackSetting) {
120 break;
122 OS << ":xnack-";
123 break;
125 OS << ":xnack+";
126 break;
127 }
128 }
129
130 OS << "\"\n";
131}
132
134addOperand(MCInst &Inst, const MCOperand& Opnd) {
135 Inst.addOperand(Opnd);
136 return Opnd.isValid() ?
139}
140
142 AMDGPU::OpName Name) {
143 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
144 if (OpIdx != -1) {
145 auto *I = MI.begin();
146 std::advance(I, OpIdx);
147 MI.insert(I, Op);
148 }
149 return OpIdx;
150}
151
152static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
153 uint64_t Addr,
154 const MCDisassembler *Decoder) {
155 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
156
157 // Our branches take a simm16.
158 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
159
160 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
162 return addOperand(Inst, MCOperand::createImm(Imm));
163}
164
165static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
166 const MCDisassembler *Decoder) {
167 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
168 int64_t Offset;
169 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
171 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
172 Offset = Imm & 0xFFFFF;
173 } else { // GFX9+ supports 21-bit signed offsets.
175 }
177}
178
179static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
180 const MCDisassembler *Decoder) {
181 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
182 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
183}
184
185static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
186 uint64_t Addr,
187 const MCDisassembler *Decoder) {
188 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
189 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
190}
191
192static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
193 const MCDisassembler *Decoder) {
194 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
195 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
196}
197
198#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
199 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
200 uint64_t /*Addr*/, \
201 const MCDisassembler *Decoder) { \
202 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
203 return addOperand(Inst, DAsm->DecoderName(Imm)); \
204 }
205
206// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
207// number of register. Used by VGPR only and AGPR only operands.
208#define DECODE_OPERAND_REG_8(RegClass) \
209 static DecodeStatus Decode##RegClass##RegisterClass( \
210 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
211 const MCDisassembler *Decoder) { \
212 assert(Imm < (1 << 8) && "8-bit encoding"); \
213 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
214 return addOperand( \
215 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
216 }
217
218#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
219 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
220 const MCDisassembler *Decoder) { \
221 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
222 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
223 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
224 }
225
226static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
227 unsigned OpWidth, unsigned Imm, unsigned EncImm,
228 const MCDisassembler *Decoder) {
229 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
230 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
231 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
232}
233
234// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
235// get register class. Used by SGPR only operands.
236#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
237 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
238
239#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
240 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
241
242// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
243// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
244// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
245// Used by AV_ register classes (AGPR or VGPR only register operands).
246template <unsigned OpWidth>
247static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
248 const MCDisassembler *Decoder) {
249 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
250 Decoder);
251}
252
253// Decoder for Src(9-bit encoding) registers only.
254template <unsigned OpWidth>
255static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
256 uint64_t /* Addr */,
257 const MCDisassembler *Decoder) {
258 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
259}
260
261// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
262// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
263// only.
264template <unsigned OpWidth>
265static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
266 const MCDisassembler *Decoder) {
267 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
268}
269
270// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
271// Imm{9} is acc, registers only.
272template <unsigned OpWidth>
273static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
274 uint64_t /* Addr */,
275 const MCDisassembler *Decoder) {
276 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
277}
278
279// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
280// register from RegClass or immediate. Registers that don't belong to RegClass
281// will be decoded and InstPrinter will report warning. Immediate will be
282// decoded into constant matching the OperandType (important for floating point
283// types).
284template <unsigned OpWidth>
285static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
286 uint64_t /* Addr */,
287 const MCDisassembler *Decoder) {
288 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
289}
290
291// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
292// and decode using 'enum10' from decodeSrcOp.
293template <unsigned OpWidth>
294static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
295 uint64_t /* Addr */,
296 const MCDisassembler *Decoder) {
297 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
298}
299
300// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
301// when RegisterClass is used as an operand. Most often used for destination
302// operands.
303
305DECODE_OPERAND_REG_8(VGPR_32_Lo128)
308DECODE_OPERAND_REG_8(VReg_128)
309DECODE_OPERAND_REG_8(VReg_192)
310DECODE_OPERAND_REG_8(VReg_256)
311DECODE_OPERAND_REG_8(VReg_288)
312DECODE_OPERAND_REG_8(VReg_320)
313DECODE_OPERAND_REG_8(VReg_352)
314DECODE_OPERAND_REG_8(VReg_384)
315DECODE_OPERAND_REG_8(VReg_512)
316DECODE_OPERAND_REG_8(VReg_1024)
317
318DECODE_OPERAND_SREG_7(SReg_32, 32)
319DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
320DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
321DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
322DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
323DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
324DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
325DECODE_OPERAND_SREG_7(SReg_96, 96)
326DECODE_OPERAND_SREG_7(SReg_128, 128)
327DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
328DECODE_OPERAND_SREG_7(SReg_256, 256)
329DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
330DECODE_OPERAND_SREG_7(SReg_512, 512)
331
332DECODE_OPERAND_SREG_8(SReg_64, 64)
333
336DECODE_OPERAND_REG_8(AReg_128)
337DECODE_OPERAND_REG_8(AReg_256)
338DECODE_OPERAND_REG_8(AReg_512)
339DECODE_OPERAND_REG_8(AReg_1024)
340
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
346
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
349 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
350 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
351}
352
353static DecodeStatus
355 const MCDisassembler *Decoder) {
356 assert(isUInt<8>(Imm) && "8-bit encoding expected");
357
358 bool IsHi = Imm & (1 << 7);
359 unsigned RegIdx = Imm & 0x7f;
360 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
361 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
362}
363
364template <unsigned OpWidth>
366 uint64_t /*Addr*/,
367 const MCDisassembler *Decoder) {
368 assert(isUInt<9>(Imm) && "9-bit encoding expected");
369
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 if (Imm & AMDGPU::EncValues::IS_VGPR) {
372 bool IsHi = Imm & (1 << 7);
373 unsigned RegIdx = Imm & 0x7f;
374 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
375 }
376 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
377}
378
379template <unsigned OpWidth>
380static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
381 uint64_t /*Addr*/,
382 const MCDisassembler *Decoder) {
383 assert(isUInt<10>(Imm) && "10-bit encoding expected");
384
385 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
386 if (Imm & AMDGPU::EncValues::IS_VGPR) {
387 bool IsHi = Imm & (1 << 9);
388 unsigned RegIdx = Imm & 0xff;
389 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
390 }
391 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
392}
393
394static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
395 uint64_t /*Addr*/,
396 const MCDisassembler *Decoder) {
397 assert(isUInt<10>(Imm) && "10-bit encoding expected");
398 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
399
400 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
401
402 bool IsHi = Imm & (1 << 9);
403 unsigned RegIdx = Imm & 0xff;
404 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
405}
406
407static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
408 uint64_t Addr,
409 const MCDisassembler *Decoder) {
410 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
411 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
412}
413
415 uint64_t Addr,
416 const MCDisassembler *Decoder) {
417 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
418 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
419}
420
421static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
422 uint64_t Addr, const void *Decoder) {
423 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
424 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
425}
426
427static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
428 const MCDisassembler *Decoder) {
429 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
430 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
431}
432
433template <unsigned Opw>
434static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
435 uint64_t /* Addr */,
436 const MCDisassembler *Decoder) {
437 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
438}
439
440static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
441 uint64_t Addr,
442 const MCDisassembler *Decoder) {
443 assert(Imm < (1 << 9) && "9-bit encoding");
444 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
445 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
446}
447
448#define DECODE_SDWA(DecName) \
449DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
450
451DECODE_SDWA(Src32)
452DECODE_SDWA(Src16)
453DECODE_SDWA(VopcDst)
454
455static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
456 uint64_t /* Addr */,
457 const MCDisassembler *Decoder) {
458 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
459 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
460}
461
462#include "AMDGPUGenDisassemblerTables.inc"
463
464namespace {
465// Define bitwidths for various types used to instantiate the decoder.
466template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
467template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
468template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
469template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
470} // namespace
471
472//===----------------------------------------------------------------------===//
473//
474//===----------------------------------------------------------------------===//
475
476template <typename InsnType>
478 InsnType Inst, uint64_t Address,
479 raw_ostream &Comments) const {
480 assert(MI.getOpcode() == 0);
481 assert(MI.getNumOperands() == 0);
482 MCInst TmpInst;
483 HasLiteral = false;
484 const auto SavedBytes = Bytes;
485
486 SmallString<64> LocalComments;
487 raw_svector_ostream LocalCommentStream(LocalComments);
488 CommentStream = &LocalCommentStream;
489
490 DecodeStatus Res =
491 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
492
493 CommentStream = nullptr;
494
495 if (Res != MCDisassembler::Fail) {
496 MI = TmpInst;
497 Comments << LocalComments;
499 }
500 Bytes = SavedBytes;
502}
503
504template <typename InsnType>
507 MCInst &MI, InsnType Inst, uint64_t Address,
508 raw_ostream &Comments) const {
509 for (const uint8_t *T : {Table1, Table2}) {
510 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
511 return Res;
512 }
514}
515
516template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
517 assert(Bytes.size() >= sizeof(T));
518 const auto Res =
520 Bytes = Bytes.slice(sizeof(T));
521 return Res;
522}
523
524static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
525 using namespace llvm::support::endian;
526 assert(Bytes.size() >= 12);
527 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
528 Bytes = Bytes.slice(8);
529 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
530 Bytes = Bytes.slice(4);
531 return (Hi << 64) | Lo;
532}
533
534static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
535 using namespace llvm::support::endian;
536 assert(Bytes.size() >= 16);
537 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
538 Bytes = Bytes.slice(8);
539 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
540 Bytes = Bytes.slice(8);
541 return (Hi << 64) | Lo;
542}
543
544void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
545 const MCInstrInfo &MCII) const {
546 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
547 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
548 if (OpNo >= MI.getNumOperands())
549 continue;
550
551 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
552 // defined to take VGPR_32, but in reality allowing inline constants.
553 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
554 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
555 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
556 continue;
557
558 MCOperand &Op = MI.getOperand(OpNo);
559 if (!Op.isImm())
560 continue;
561 int64_t Imm = Op.getImm();
564 Op = decodeIntImmed(Imm);
565 continue;
566 }
567
569 Op = decodeLiteralConstant(Desc, OpDesc);
570 continue;
571 }
572
575 switch (OpDesc.OperandType) {
581 break;
584 Imm = getInlineImmValF16(Imm);
585 break;
588 Imm = getInlineImmValF16(Imm);
589 break;
591 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
592 // halves, so we need to produce the duplicated value for correct
593 // round-trip.
594 if (isGFX11Plus()) {
595 int64_t F16Val = getInlineImmValF16(Imm);
596 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
597 } else {
598 Imm = getInlineImmValF16(Imm);
599 }
600 break;
601 }
609 Imm = getInlineImmVal64(Imm);
610 break;
611 default:
612 Imm = getInlineImmVal32(Imm);
613 }
614 Op.setImm(Imm);
615 }
616 }
617}
618
620 ArrayRef<uint8_t> Bytes_,
622 raw_ostream &CS) const {
623 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
624 Bytes = Bytes_.slice(0, MaxInstBytesNum);
625
626 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
627 // there are fewer bytes left). This will be overridden on success.
628 Size = std::min((size_t)4, Bytes_.size());
629
630 do {
631 // ToDo: better to switch encoding length using some bit predicate
632 // but it is unknown yet, so try all we can
633
634 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
635 // encodings
636 if (isGFX1250Plus() && Bytes.size() >= 16) {
637 std::bitset<128> DecW = eat16Bytes(Bytes);
638 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
639 break;
640 Bytes = Bytes_.slice(0, MaxInstBytesNum);
641 }
642
643 if (isGFX11Plus() && Bytes.size() >= 12) {
644 std::bitset<96> DecW = eat12Bytes(Bytes);
645
646 if (isGFX1170() &&
647 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
648 DecW, Address, CS))
649 break;
650
651 if (isGFX11() &&
652 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
653 DecW, Address, CS))
654 break;
655
656 if (isGFX1250() &&
657 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
658 DecW, Address, CS))
659 break;
660
661 if (isGFX12() &&
662 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
663 DecW, Address, CS))
664 break;
665
666 if (isGFX12() &&
667 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
668 break;
669
670 if (isGFX13() &&
671 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
672 DecW, Address, CS))
673 break;
674
675 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
676 // Return 8 bytes for a potential literal.
677 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
678
679 if (isGFX1250() &&
680 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
681 break;
682 }
683
684 // Reinitialize Bytes
685 Bytes = Bytes_.slice(0, MaxInstBytesNum);
686
687 } else if (Bytes.size() >= 16 &&
688 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
689 std::bitset<128> DecW = eat16Bytes(Bytes);
690 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
691 break;
692
693 // Reinitialize Bytes
694 Bytes = Bytes_.slice(0, MaxInstBytesNum);
695 }
696
697 if (Bytes.size() >= 8) {
698 const uint64_t QW = eatBytes<uint64_t>(Bytes);
699
700 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
701 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
702 break;
703
704 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
705 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
706 break;
707
708 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
709 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
710 break;
711
712 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
713 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
714 // table first so we print the correct name.
715 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
716 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
717 break;
718
719 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
720 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
721 break;
722
723 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
724 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
725 break;
726
727 if ((isVI() || isGFX9()) &&
728 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
729 break;
730
731 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
732 break;
733
734 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
735 break;
736
737 if (isGFX1250() &&
738 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
739 QW, Address, CS))
740 break;
741
742 if (isGFX12() &&
743 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
744 Address, CS))
745 break;
746
747 if (isGFX1170() &&
748 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
749 QW, Address, CS))
750 break;
751
752 if (isGFX11() &&
753 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
754 Address, CS))
755 break;
756
757 if (isGFX1170() &&
758 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
759 break;
760
761 if (isGFX11() &&
762 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
763 break;
764
765 if (isGFX12() &&
766 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
767 break;
768
769 if (isGFX13() &&
770 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
771 Address, CS))
772 break;
773
774 // Reinitialize Bytes
775 Bytes = Bytes_.slice(0, MaxInstBytesNum);
776 }
777
778 // Try decode 32-bit instruction
779 if (Bytes.size() >= 4) {
780 const uint32_t DW = eatBytes<uint32_t>(Bytes);
781
782 if ((isVI() || isGFX9()) &&
783 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
784 break;
785
786 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
787 break;
788
789 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
790 break;
791
792 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
793 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
794 break;
795
796 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
797 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
798 break;
799
800 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
801 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
802 break;
803
804 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
805 break;
806
807 if (isGFX1170() &&
808 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
809 DW, Address, CS))
810 break;
811
812 if (isGFX11() &&
813 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
814 Address, CS))
815 break;
816
817 if (isGFX1250() &&
818 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
819 DW, Address, CS))
820 break;
821
822 if (isGFX12() &&
823 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
824 Address, CS))
825 break;
826
827 if (isGFX13() &&
828 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
829 Address, CS))
830 break;
831 }
832
834 } while (false);
835
837
838 decodeImmOperands(MI, *MCII);
839
840 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
841 if (isMacDPP(MI))
843
844 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
846 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
847 convertVOPCDPPInst(MI); // Special VOP3 case
848 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
849 convertVOPC64DPPInst(MI); // Special VOP3 case
850 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
851 -1)
853 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
854 convertVOP3DPPInst(MI); // Regular VOP3 case
855 }
856
858
859 if (AMDGPU::isMAC(MI.getOpcode())) {
860 // Insert dummy unused src2_modifiers.
862 AMDGPU::OpName::src2_modifiers);
863 }
864
865 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
866 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
867 // Insert dummy unused src2_modifiers.
869 AMDGPU::OpName::src2_modifiers);
870 }
871
872 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
874 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
875 }
876
877 if (MCII->get(MI.getOpcode()).TSFlags &
879 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
880 AMDGPU::OpName::cpol);
881 if (CPolPos != -1) {
882 unsigned CPol =
883 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
885 if (MI.getNumOperands() <= (unsigned)CPolPos) {
887 AMDGPU::OpName::cpol);
888 } else if (CPol) {
889 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
890 }
891 }
892 }
893
894 if ((MCII->get(MI.getOpcode()).TSFlags &
896 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
897 // GFX90A lost TFE, its place is occupied by ACC.
898 int TFEOpIdx =
899 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
900 if (TFEOpIdx != -1) {
901 auto *TFEIter = MI.begin();
902 std::advance(TFEIter, TFEOpIdx);
903 MI.insert(TFEIter, MCOperand::createImm(0));
904 }
905 }
906
907 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
909 int OffsetIdx =
910 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
911 if (OffsetIdx != -1) {
912 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
913 int64_t SignedOffset = SignExtend64<24>(Imm);
914 if (SignedOffset < 0)
916 }
917 }
918
919 if (MCII->get(MI.getOpcode()).TSFlags &
921 int SWZOpIdx =
922 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
923 if (SWZOpIdx != -1) {
924 auto *SWZIter = MI.begin();
925 std::advance(SWZIter, SWZOpIdx);
926 MI.insert(SWZIter, MCOperand::createImm(0));
927 }
928 }
929
930 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
931 if (Desc.TSFlags & SIInstrFlags::MIMG) {
932 int VAddr0Idx =
933 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
934 int RsrcIdx =
935 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
936 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
937 if (VAddr0Idx >= 0 && NSAArgs > 0) {
938 unsigned NSAWords = (NSAArgs + 3) / 4;
939 if (Bytes.size() < 4 * NSAWords)
941 for (unsigned i = 0; i < NSAArgs; ++i) {
942 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
943 auto VAddrRCID =
944 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
945 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
946 }
947 Bytes = Bytes.slice(4 * NSAWords);
948 }
949
951 }
952
953 if (MCII->get(MI.getOpcode()).TSFlags &
956
957 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
959
960 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
962
963 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
965
966 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
968
969 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
971
972 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
973 AMDGPU::OpName::vdst_in);
974 if (VDstIn_Idx != -1) {
975 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
977 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
978 !MI.getOperand(VDstIn_Idx).isReg() ||
979 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
980 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
981 MI.erase(&MI.getOperand(VDstIn_Idx));
983 MCOperand::createReg(MI.getOperand(Tied).getReg()),
984 AMDGPU::OpName::vdst_in);
985 }
986 }
987
988 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
989 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
991
992 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
993 // have EXEC as implicit destination. Issue a warning if encoding for
994 // vdst is not EXEC.
995 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
996 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
997 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
998 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
999 if (Bytes_[0] != ExecEncoding)
1001 }
1002
1003 Size = MaxInstBytesNum - Bytes.size();
1004 return Status;
1005}
1006
1008 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
1009 // The MCInst still has these fields even though they are no longer encoded
1010 // in the GFX11 instruction.
1011 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
1012 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
1013 }
1014}
1015
1018 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
1019 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
1020 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
1021 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
1022 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
1023 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
1024 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
1025 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
1026 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
1027 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
1028 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
1029 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
1030 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
1031 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
1032 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
1033 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
1034 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
1035 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
1036 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
1037 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
1038 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
1039 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
1040 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
1041 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
1042 // The MCInst has this field that is not directly encoded in the
1043 // instruction.
1044 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
1045 }
1046}
1047
1049 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1050 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1051 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
1052 // VOPC - insert clamp
1053 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
1054 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1055 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1056 if (SDst != -1) {
1057 // VOPC - insert VCC register as sdst
1059 AMDGPU::OpName::sdst);
1060 } else {
1061 // VOP1/2 - insert omod if present in instruction
1062 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1063 }
1064 }
1065}
1066
1067/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1068/// appropriate subregister for the used format width.
1070 MCOperand &MO, uint8_t NumRegs) {
1071 switch (NumRegs) {
1072 case 4:
1073 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1074 case 6:
1075 return MO.setReg(
1076 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1077 case 8:
1078 if (MCRegister NewReg = MRI.getSubReg(
1079 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1080 MO.setReg(NewReg);
1081 }
1082 return;
1083 case 12: {
1084 // There is no 384-bit subreg index defined.
1085 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1086 MCRegister NewReg = MRI.getMatchingSuperReg(
1087 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1088 return MO.setReg(NewReg);
1089 }
1090 case 16:
1091 // No-op in cases where one operand is still f8/bf8.
1092 return;
1093 default:
1094 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1095 }
1096}
1097
1098/// f8f6f4 instructions have different pseudos depending on the used formats. In
1099/// the disassembler table, we only have the variants with the largest register
1100/// classes which assume using an fp8/bf8 format for both operands. The actual
1101/// register class depends on the format in blgp and cbsz operands. Adjust the
1102/// register classes depending on the used format.
1104 int BlgpIdx =
1105 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1106 if (BlgpIdx == -1)
1107 return;
1108
1109 int CbszIdx =
1110 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1111
1112 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1113 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1114
1115 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1116 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1117 if (!AdjustedRegClassOpcode ||
1118 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1119 return;
1120
1121 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1122 int Src0Idx =
1123 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1124 int Src1Idx =
1125 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1126 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1127 AdjustedRegClassOpcode->NumRegsSrcA);
1128 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1129 AdjustedRegClassOpcode->NumRegsSrcB);
1130}
1131
1133 int FmtAIdx =
1134 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1135 if (FmtAIdx == -1)
1136 return;
1137
1138 int FmtBIdx =
1139 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1140
1141 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1142 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1143
1144 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1145 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1146 if (!AdjustedRegClassOpcode ||
1147 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1148 return;
1149
1150 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1151 int Src0Idx =
1152 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1153 int Src1Idx =
1154 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1155 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1156 AdjustedRegClassOpcode->NumRegsSrcA);
1157 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1158 AdjustedRegClassOpcode->NumRegsSrcB);
1159}
1160
1162 unsigned OpSel = 0;
1163 unsigned OpSelHi = 0;
1164 unsigned NegLo = 0;
1165 unsigned NegHi = 0;
1166};
1167
1168// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1169// Note that these values do not affect disassembler output,
1170// so this is only necessary for consistency with src_modifiers.
1172 bool IsVOP3P = false) {
1173 VOPModifiers Modifiers;
1174 unsigned Opc = MI.getOpcode();
1175 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1176 AMDGPU::OpName::src1_modifiers,
1177 AMDGPU::OpName::src2_modifiers};
1178 for (int J = 0; J < 3; ++J) {
1179 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1180 if (OpIdx == -1)
1181 continue;
1182
1183 unsigned Val = MI.getOperand(OpIdx).getImm();
1184
1185 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1186 if (IsVOP3P) {
1187 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1188 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1189 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1190 } else if (J == 0) {
1191 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1192 }
1193 }
1194
1195 return Modifiers;
1196}
1197
1198// Instructions decode the op_sel/suffix bits into the src_modifier
1199// operands. Copy those bits into the src operands for true16 VGPRs.
1201 const unsigned Opc = MI.getOpcode();
1202 const MCRegisterClass &ConversionRC =
1203 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1204 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1205 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1207 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1209 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1211 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1213 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1214 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1215 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1216 if (OpIdx == -1 || OpModsIdx == -1)
1217 continue;
1218 MCOperand &Op = MI.getOperand(OpIdx);
1219 if (!Op.isReg())
1220 continue;
1221 if (!ConversionRC.contains(Op.getReg()))
1222 continue;
1223 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1224 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1225 unsigned ModVal = OpMods.getImm();
1226 if (ModVal & OpSelMask) { // isHi
1227 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1228 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1229 }
1230 }
1231}
1232
1233// MAC opcodes have special old and src2 operands.
1234// src2 is tied to dst, while old is not tied (but assumed to be).
1236 constexpr int DST_IDX = 0;
1237 auto Opcode = MI.getOpcode();
1238 const auto &Desc = MCII->get(Opcode);
1239 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1240
1241 if (OldIdx != -1 && Desc.getOperandConstraint(
1242 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1243 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1244 assert(Desc.getOperandConstraint(
1245 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1247 (void)DST_IDX;
1248 return true;
1249 }
1250
1251 return false;
1252}
1253
1254// Create dummy old operand and insert dummy unused src2_modifiers
1256 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1257 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1259 AMDGPU::OpName::src2_modifiers);
1260}
1261
1263 unsigned Opc = MI.getOpcode();
1264
1265 int VDstInIdx =
1266 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1267 if (VDstInIdx != -1)
1268 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1269
1270 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1271 if (MI.getNumOperands() < DescNumOps &&
1272 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1274 auto Mods = collectVOPModifiers(MI);
1276 AMDGPU::OpName::op_sel);
1277 } else {
1278 // Insert dummy unused src modifiers.
1279 if (MI.getNumOperands() < DescNumOps &&
1280 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1282 AMDGPU::OpName::src0_modifiers);
1283
1284 if (MI.getNumOperands() < DescNumOps &&
1285 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1287 AMDGPU::OpName::src1_modifiers);
1288 }
1289}
1290
1293
1294 int VDstInIdx =
1295 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1296 if (VDstInIdx != -1)
1297 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1298
1299 unsigned Opc = MI.getOpcode();
1300 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1301 if (MI.getNumOperands() < DescNumOps &&
1302 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1303 auto Mods = collectVOPModifiers(MI);
1305 AMDGPU::OpName::op_sel);
1306 }
1307}
1308
1309// Given a wide tuple \p Reg check if it will overflow 256 registers.
1310// \returns \p Reg on success or NoRegister otherwise.
1312 const MCRegisterInfo &MRI) {
1313 unsigned NumRegs = RC.getSizeInBits() / 32;
1314 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1315 if (!Sub0)
1316 return Reg;
1317
1318 MCRegister BaseReg;
1319 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1320 BaseReg = AMDGPU::VGPR0;
1321 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1322 BaseReg = AMDGPU::AGPR0;
1323
1324 assert(BaseReg && "Only vector registers expected");
1325
1326 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1327}
1328
1329// Note that before gfx10, the MIMG encoding provided no information about
1330// VADDR size. Consequently, decoded instructions always show address as if it
1331// has 1 dword, which could be not really so.
1333 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1334
1335 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1336 AMDGPU::OpName::vdst);
1337
1338 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1339 AMDGPU::OpName::vdata);
1340 int VAddr0Idx =
1341 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1342 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1343 ? AMDGPU::OpName::srsrc
1344 : AMDGPU::OpName::rsrc;
1345 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1346 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1347 AMDGPU::OpName::dmask);
1348
1349 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1350 AMDGPU::OpName::tfe);
1351 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1352 AMDGPU::OpName::d16);
1353
1354 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1355 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1356 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1357
1358 assert(VDataIdx != -1);
1359 if (BaseOpcode->BVH) {
1360 // Add A16 operand for intersect_ray instructions
1361 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1362 return;
1363 }
1364
1365 bool IsAtomic = (VDstIdx != -1);
1366 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1367 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1368 bool IsNSA = false;
1369 bool IsPartialNSA = false;
1370 unsigned AddrSize = Info->VAddrDwords;
1371
1372 if (isGFX10Plus()) {
1373 unsigned DimIdx =
1374 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1375 int A16Idx =
1376 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1377 const AMDGPU::MIMGDimInfo *Dim =
1378 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1379 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1380
1381 AddrSize =
1382 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1383
1384 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1385 // VIMAGE insts other than BVH never use vaddr4.
1386 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1387 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1388 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1389 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1390 if (!IsNSA) {
1391 if (!IsVSample && AddrSize > 12)
1392 AddrSize = 16;
1393 } else {
1394 if (AddrSize > Info->VAddrDwords) {
1395 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1396 // The NSA encoding does not contain enough operands for the
1397 // combination of base opcode / dimension. Should this be an error?
1398 return;
1399 }
1400 IsPartialNSA = true;
1401 }
1402 }
1403 }
1404
1405 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1406 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1407
1408 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1409 if (D16 && AMDGPU::hasPackedD16(STI)) {
1410 DstSize = (DstSize + 1) / 2;
1411 }
1412
1413 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1414 DstSize += 1;
1415
1416 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1417 return;
1418
1419 int NewOpcode =
1420 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1421 if (NewOpcode == -1)
1422 return;
1423
1424 // Widen the register to the correct number of enabled channels.
1425 MCRegister NewVdata;
1426 if (DstSize != Info->VDataDwords) {
1427 auto DataRCID = MCII->getOpRegClassID(
1428 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1429
1430 // Get first subregister of VData
1431 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1432 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1433 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1434
1435 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1436 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1437 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1438 if (!NewVdata) {
1439 // It's possible to encode this such that the low register + enabled
1440 // components exceeds the register count.
1441 return;
1442 }
1443 }
1444
1445 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1446 // If using partial NSA on GFX11+ widen last address register.
1447 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1448 MCRegister NewVAddrSA;
1449 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1450 AddrSize != Info->VAddrDwords) {
1451 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1452 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1453 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1454
1455 auto AddrRCID = MCII->getOpRegClassID(
1456 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1457
1458 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1459 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1460 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1461 if (!NewVAddrSA)
1462 return;
1463 }
1464
1465 MI.setOpcode(NewOpcode);
1466
1467 if (NewVdata != AMDGPU::NoRegister) {
1468 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1469
1470 if (IsAtomic) {
1471 // Atomic operations have an additional operand (a copy of data)
1472 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1473 }
1474 }
1475
1476 if (NewVAddrSA) {
1477 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1478 } else if (IsNSA) {
1479 assert(AddrSize <= Info->VAddrDwords);
1480 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1481 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1482 }
1483}
1484
1485// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1486// decoder only adds to src_modifiers, so manually add the bits to the other
1487// operands.
1489 unsigned Opc = MI.getOpcode();
1490 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1491 auto Mods = collectVOPModifiers(MI, true);
1492
1493 if (MI.getNumOperands() < DescNumOps &&
1494 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1495 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1496
1497 if (MI.getNumOperands() < DescNumOps &&
1498 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1500 AMDGPU::OpName::op_sel);
1501 if (MI.getNumOperands() < DescNumOps &&
1502 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1504 AMDGPU::OpName::op_sel_hi);
1505 if (MI.getNumOperands() < DescNumOps &&
1506 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1508 AMDGPU::OpName::neg_lo);
1509 if (MI.getNumOperands() < DescNumOps &&
1510 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1512 AMDGPU::OpName::neg_hi);
1513}
1514
1515// Create dummy old operand and insert optional operands
1517 unsigned Opc = MI.getOpcode();
1518 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1519
1520 if (MI.getNumOperands() < DescNumOps &&
1521 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1522 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1523
1524 if (MI.getNumOperands() < DescNumOps &&
1525 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1527 AMDGPU::OpName::src0_modifiers);
1528
1529 if (MI.getNumOperands() < DescNumOps &&
1530 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1532 AMDGPU::OpName::src1_modifiers);
1533}
1534
1536 unsigned Opc = MI.getOpcode();
1537 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1538
1540
1541 if (MI.getNumOperands() < DescNumOps &&
1542 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1545 AMDGPU::OpName::op_sel);
1546 }
1547}
1548
1550 assert(HasLiteral && "Should have decoded a literal");
1551 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1552}
1553
1554const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1555 return getContext().getRegisterInfo()->
1556 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1557}
1558
1559inline
1561 const Twine& ErrMsg) const {
1562 *CommentStream << "Error: " + ErrMsg;
1563
1564 // ToDo: add support for error operands to MCInst.h
1565 // return MCOperand::createError(V);
1566 return MCOperand();
1567}
1568
1572
1573inline
1575 unsigned Val) const {
1576 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1577 if (Val >= RegCl.getNumRegs())
1578 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1579 ": unknown register " + Twine(Val));
1580 return createRegOperand(RegCl.getRegister(Val));
1581}
1582
1583inline
1585 unsigned Val) const {
1586 // ToDo: SI/CI have 104 SGPRs, VI - 102
1587 // Valery: here we accepting as much as we can, let assembler sort it out
1588 int shift = 0;
1589 switch (SRegClassID) {
1590 case AMDGPU::SGPR_32RegClassID:
1591 case AMDGPU::TTMP_32RegClassID:
1592 break;
1593 case AMDGPU::SGPR_64RegClassID:
1594 case AMDGPU::TTMP_64RegClassID:
1595 shift = 1;
1596 break;
1597 case AMDGPU::SGPR_96RegClassID:
1598 case AMDGPU::TTMP_96RegClassID:
1599 case AMDGPU::SGPR_128RegClassID:
1600 case AMDGPU::TTMP_128RegClassID:
1601 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1602 // this bundle?
1603 case AMDGPU::SGPR_256RegClassID:
1604 case AMDGPU::TTMP_256RegClassID:
1605 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1606 // this bundle?
1607 case AMDGPU::SGPR_288RegClassID:
1608 case AMDGPU::TTMP_288RegClassID:
1609 case AMDGPU::SGPR_320RegClassID:
1610 case AMDGPU::TTMP_320RegClassID:
1611 case AMDGPU::SGPR_352RegClassID:
1612 case AMDGPU::TTMP_352RegClassID:
1613 case AMDGPU::SGPR_384RegClassID:
1614 case AMDGPU::TTMP_384RegClassID:
1615 case AMDGPU::SGPR_512RegClassID:
1616 case AMDGPU::TTMP_512RegClassID:
1617 shift = 2;
1618 break;
1619 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1620 // this bundle?
1621 default:
1622 llvm_unreachable("unhandled register class");
1623 }
1624
1625 if (Val % (1 << shift)) {
1626 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1627 << ": scalar reg isn't aligned " << Val;
1628 }
1629
1630 return createRegOperand(SRegClassID, Val >> shift);
1631}
1632
1634 bool IsHi) const {
1635 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1636 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1637}
1638
1639// Decode Literals for insts which always have a literal in the encoding
1642 if (HasLiteral) {
1643 assert(
1645 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1646 if (Literal != Val)
1647 return errOperand(Val, "More than one unique literal is illegal");
1648 }
1649 HasLiteral = true;
1650 Literal = Val;
1651 return MCOperand::createImm(Literal);
1652}
1653
1656 if (HasLiteral) {
1657 if (Literal != Val)
1658 return errOperand(Val, "More than one unique literal is illegal");
1659 }
1660 HasLiteral = true;
1661 Literal = Val;
1662
1663 bool UseLit64 = Hi_32(Literal) == 0;
1665 LitModifier::Lit64, Literal, getContext()))
1666 : MCOperand::createImm(Literal);
1667}
1668
1671 const MCOperandInfo &OpDesc) const {
1672 // For now all literal constants are supposed to be unsigned integer
1673 // ToDo: deal with signed/unsigned 64-bit integer constants
1674 // ToDo: deal with float/double constants
1675 if (!HasLiteral) {
1676 if (Bytes.size() < 4) {
1677 return errOperand(0, "cannot read literal, inst bytes left " +
1678 Twine(Bytes.size()));
1679 }
1680 HasLiteral = true;
1681 Literal = eatBytes<uint32_t>(Bytes);
1682 }
1683
1684 // For disassembling always assume all inline constants are available.
1685 bool HasInv2Pi = true;
1686
1687 // Invalid instruction codes may contain literals for inline-only
1688 // operands, so we support them here as well.
1689 int64_t Val = Literal;
1690 bool UseLit = false;
1691 switch (OpDesc.OperandType) {
1692 default:
1693 llvm_unreachable("Unexpected operand type!");
1697 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1698 break;
1701 break;
1705 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1706 break;
1708 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1709 break;
1712 break;
1714 break;
1718 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1719 break;
1721 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1722 break;
1732 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1733 break;
1738 UseLit = AMDGPU::isInlinableLiteral64(Val << 32, HasInv2Pi);
1739 if (!UseLit)
1740 Val <<= 32;
1741 break;
1745 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1746 break;
1748 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1749 // decoding a literal in a position of a register operand. Give
1750 // it special handling in the caller, decodeImmOperands(), instead
1751 // of quietly allowing it here.
1752 break;
1753 }
1754
1757 : MCOperand::createImm(Val);
1758}
1759
1761 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1762
1763 if (!HasLiteral) {
1764 if (Bytes.size() < 8) {
1765 return errOperand(0, "cannot read literal64, inst bytes left " +
1766 Twine(Bytes.size()));
1767 }
1768 HasLiteral = true;
1769 Literal = eatBytes<uint64_t>(Bytes);
1770 }
1771
1772 bool UseLit64 = Hi_32(Literal) == 0;
1773
1774 UseLit64 |= AMDGPU::isInlinableLiteral64(
1775 Literal, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1776
1778 LitModifier::Lit64, Literal, getContext()))
1779 : MCOperand::createImm(Literal);
1780}
1781
1783 using namespace AMDGPU::EncValues;
1784
1785 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1786 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1787 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1788 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1789 // Cast prevents negative overflow.
1790}
1791
1792static int64_t getInlineImmVal32(unsigned Imm) {
1793 switch (Imm) {
1794 case 240:
1795 return llvm::bit_cast<uint32_t>(0.5f);
1796 case 241:
1797 return llvm::bit_cast<uint32_t>(-0.5f);
1798 case 242:
1799 return llvm::bit_cast<uint32_t>(1.0f);
1800 case 243:
1801 return llvm::bit_cast<uint32_t>(-1.0f);
1802 case 244:
1803 return llvm::bit_cast<uint32_t>(2.0f);
1804 case 245:
1805 return llvm::bit_cast<uint32_t>(-2.0f);
1806 case 246:
1807 return llvm::bit_cast<uint32_t>(4.0f);
1808 case 247:
1809 return llvm::bit_cast<uint32_t>(-4.0f);
1810 case 248: // 1 / (2 * PI)
1811 return 0x3e22f983;
1812 default:
1813 llvm_unreachable("invalid fp inline imm");
1814 }
1815}
1816
1817static int64_t getInlineImmVal64(unsigned Imm) {
1818 switch (Imm) {
1819 case 240:
1820 return llvm::bit_cast<uint64_t>(0.5);
1821 case 241:
1822 return llvm::bit_cast<uint64_t>(-0.5);
1823 case 242:
1824 return llvm::bit_cast<uint64_t>(1.0);
1825 case 243:
1826 return llvm::bit_cast<uint64_t>(-1.0);
1827 case 244:
1828 return llvm::bit_cast<uint64_t>(2.0);
1829 case 245:
1830 return llvm::bit_cast<uint64_t>(-2.0);
1831 case 246:
1832 return llvm::bit_cast<uint64_t>(4.0);
1833 case 247:
1834 return llvm::bit_cast<uint64_t>(-4.0);
1835 case 248: // 1 / (2 * PI)
1836 return 0x3fc45f306dc9c882;
1837 default:
1838 llvm_unreachable("invalid fp inline imm");
1839 }
1840}
1841
1842static int64_t getInlineImmValF16(unsigned Imm) {
1843 switch (Imm) {
1844 case 240:
1845 return 0x3800;
1846 case 241:
1847 return 0xB800;
1848 case 242:
1849 return 0x3C00;
1850 case 243:
1851 return 0xBC00;
1852 case 244:
1853 return 0x4000;
1854 case 245:
1855 return 0xC000;
1856 case 246:
1857 return 0x4400;
1858 case 247:
1859 return 0xC400;
1860 case 248: // 1 / (2 * PI)
1861 return 0x3118;
1862 default:
1863 llvm_unreachable("invalid fp inline imm");
1864 }
1865}
1866
1867static int64_t getInlineImmValBF16(unsigned Imm) {
1868 switch (Imm) {
1869 case 240:
1870 return 0x3F00;
1871 case 241:
1872 return 0xBF00;
1873 case 242:
1874 return 0x3F80;
1875 case 243:
1876 return 0xBF80;
1877 case 244:
1878 return 0x4000;
1879 case 245:
1880 return 0xC000;
1881 case 246:
1882 return 0x4080;
1883 case 247:
1884 return 0xC080;
1885 case 248: // 1 / (2 * PI)
1886 return 0x3E22;
1887 default:
1888 llvm_unreachable("invalid fp inline imm");
1889 }
1890}
1891
1892unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1893 using namespace AMDGPU;
1894
1895 switch (Width) {
1896 case 16:
1897 case 32:
1898 return VGPR_32RegClassID;
1899 case 64:
1900 return VReg_64RegClassID;
1901 case 96:
1902 return VReg_96RegClassID;
1903 case 128:
1904 return VReg_128RegClassID;
1905 case 160:
1906 return VReg_160RegClassID;
1907 case 192:
1908 return VReg_192RegClassID;
1909 case 256:
1910 return VReg_256RegClassID;
1911 case 288:
1912 return VReg_288RegClassID;
1913 case 320:
1914 return VReg_320RegClassID;
1915 case 352:
1916 return VReg_352RegClassID;
1917 case 384:
1918 return VReg_384RegClassID;
1919 case 512:
1920 return VReg_512RegClassID;
1921 case 1024:
1922 return VReg_1024RegClassID;
1923 }
1924 llvm_unreachable("Invalid register width!");
1925}
1926
1927unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1928 using namespace AMDGPU;
1929
1930 switch (Width) {
1931 case 16:
1932 case 32:
1933 return AGPR_32RegClassID;
1934 case 64:
1935 return AReg_64RegClassID;
1936 case 96:
1937 return AReg_96RegClassID;
1938 case 128:
1939 return AReg_128RegClassID;
1940 case 160:
1941 return AReg_160RegClassID;
1942 case 256:
1943 return AReg_256RegClassID;
1944 case 288:
1945 return AReg_288RegClassID;
1946 case 320:
1947 return AReg_320RegClassID;
1948 case 352:
1949 return AReg_352RegClassID;
1950 case 384:
1951 return AReg_384RegClassID;
1952 case 512:
1953 return AReg_512RegClassID;
1954 case 1024:
1955 return AReg_1024RegClassID;
1956 }
1957 llvm_unreachable("Invalid register width!");
1958}
1959
1960unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1961 using namespace AMDGPU;
1962
1963 switch (Width) {
1964 case 16:
1965 case 32:
1966 return SGPR_32RegClassID;
1967 case 64:
1968 return SGPR_64RegClassID;
1969 case 96:
1970 return SGPR_96RegClassID;
1971 case 128:
1972 return SGPR_128RegClassID;
1973 case 160:
1974 return SGPR_160RegClassID;
1975 case 256:
1976 return SGPR_256RegClassID;
1977 case 288:
1978 return SGPR_288RegClassID;
1979 case 320:
1980 return SGPR_320RegClassID;
1981 case 352:
1982 return SGPR_352RegClassID;
1983 case 384:
1984 return SGPR_384RegClassID;
1985 case 512:
1986 return SGPR_512RegClassID;
1987 }
1988 llvm_unreachable("Invalid register width!");
1989}
1990
1991unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1992 using namespace AMDGPU;
1993
1994 switch (Width) {
1995 case 16:
1996 case 32:
1997 return TTMP_32RegClassID;
1998 case 64:
1999 return TTMP_64RegClassID;
2000 case 128:
2001 return TTMP_128RegClassID;
2002 case 256:
2003 return TTMP_256RegClassID;
2004 case 288:
2005 return TTMP_288RegClassID;
2006 case 320:
2007 return TTMP_320RegClassID;
2008 case 352:
2009 return TTMP_352RegClassID;
2010 case 384:
2011 return TTMP_384RegClassID;
2012 case 512:
2013 return TTMP_512RegClassID;
2014 }
2015 llvm_unreachable("Invalid register width!");
2016}
2017
2018int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
2019 using namespace AMDGPU::EncValues;
2020
2021 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
2022 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
2023
2024 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
2025}
2026
2028 unsigned Val) const {
2029 using namespace AMDGPU::EncValues;
2030
2031 assert(Val < 1024); // enum10
2032
2033 bool IsAGPR = Val & 512;
2034 Val &= 511;
2035
2036 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
2037 return createRegOperand(IsAGPR ? getAgprClassId(Width)
2038 : getVgprClassId(Width), Val - VGPR_MIN);
2039 }
2040 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
2041}
2042
2044 unsigned Width,
2045 unsigned Val) const {
2046 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
2047 // decoded earlier.
2048 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
2049 using namespace AMDGPU::EncValues;
2050
2051 if (Val <= SGPR_MAX) {
2052 // "SGPR_MIN <= Val" is always true and causes compilation warning.
2053 static_assert(SGPR_MIN == 0);
2054 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
2055 }
2056
2057 int TTmpIdx = getTTmpIdx(Val);
2058 if (TTmpIdx >= 0) {
2059 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
2060 }
2061
2062 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2063 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2064 Val == LITERAL_CONST)
2065 return MCOperand::createImm(Val);
2066
2067 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2068 return decodeLiteral64Constant();
2069 }
2070
2071 switch (Width) {
2072 case 32:
2073 case 16:
2074 return decodeSpecialReg32(Val);
2075 case 64:
2076 return decodeSpecialReg64(Val);
2077 case 96:
2078 case 128:
2079 case 256:
2080 case 512:
2081 return decodeSpecialReg96Plus(Val);
2082 default:
2083 llvm_unreachable("unexpected immediate type");
2084 }
2085}
2086
2087// Bit 0 of DstY isn't stored in the instruction, because it's always the
2088// opposite of bit 0 of DstX.
2090 unsigned Val) const {
2091 int VDstXInd =
2092 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2093 assert(VDstXInd != -1);
2094 assert(Inst.getOperand(VDstXInd).isReg());
2095 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2096 Val |= ~XDstReg & 1;
2097 return createRegOperand(getVgprClassId(32), Val);
2098}
2099
2101 using namespace AMDGPU;
2102
2103 switch (Val) {
2104 // clang-format off
2105 case 102: return createRegOperand(FLAT_SCR_LO);
2106 case 103: return createRegOperand(FLAT_SCR_HI);
2107 case 104: return createRegOperand(XNACK_MASK_LO);
2108 case 105: return createRegOperand(XNACK_MASK_HI);
2109 case 106: return createRegOperand(VCC_LO);
2110 case 107: return createRegOperand(VCC_HI);
2111 case 108: return createRegOperand(TBA_LO);
2112 case 109: return createRegOperand(TBA_HI);
2113 case 110: return createRegOperand(TMA_LO);
2114 case 111: return createRegOperand(TMA_HI);
2115 case 124:
2116 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2117 case 125:
2118 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2119 case 126: return createRegOperand(EXEC_LO);
2120 case 127: return createRegOperand(EXEC_HI);
2121 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2122 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2123 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2124 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2125 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2126 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2127 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2128 case 251: return createRegOperand(SRC_VCCZ);
2129 case 252: return createRegOperand(SRC_EXECZ);
2130 case 253: return createRegOperand(SRC_SCC);
2131 case 254: return createRegOperand(LDS_DIRECT);
2132 default: break;
2133 // clang-format on
2134 }
2135 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2136}
2137
2139 using namespace AMDGPU;
2140
2141 switch (Val) {
2142 case 102: return createRegOperand(FLAT_SCR);
2143 case 104: return createRegOperand(XNACK_MASK);
2144 case 106: return createRegOperand(VCC);
2145 case 108: return createRegOperand(TBA);
2146 case 110: return createRegOperand(TMA);
2147 case 124:
2148 if (isGFX11Plus())
2149 return createRegOperand(SGPR_NULL);
2150 break;
2151 case 125:
2152 if (!isGFX11Plus())
2153 return createRegOperand(SGPR_NULL);
2154 break;
2155 case 126: return createRegOperand(EXEC);
2156 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2157 case 235: return createRegOperand(SRC_SHARED_BASE);
2158 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2159 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2160 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2161 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2162 case 251: return createRegOperand(SRC_VCCZ);
2163 case 252: return createRegOperand(SRC_EXECZ);
2164 case 253: return createRegOperand(SRC_SCC);
2165 default: break;
2166 }
2167 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2168}
2169
2171 using namespace AMDGPU;
2172
2173 switch (Val) {
2174 case 124:
2175 if (isGFX11Plus())
2176 return createRegOperand(SGPR_NULL);
2177 break;
2178 case 125:
2179 if (!isGFX11Plus())
2180 return createRegOperand(SGPR_NULL);
2181 break;
2182 default:
2183 break;
2184 }
2185 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2186}
2187
2189 const unsigned Val) const {
2190 using namespace AMDGPU::SDWA;
2191 using namespace AMDGPU::EncValues;
2192
2193 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2194 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2195 // XXX: cast to int is needed to avoid stupid warning:
2196 // compare with unsigned is always true
2197 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2198 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2199 return createRegOperand(getVgprClassId(Width),
2200 Val - SDWA9EncValues::SRC_VGPR_MIN);
2201 }
2202 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2203 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2204 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2205 return createSRegOperand(getSgprClassId(Width),
2206 Val - SDWA9EncValues::SRC_SGPR_MIN);
2207 }
2208 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2209 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2210 return createSRegOperand(getTtmpClassId(Width),
2211 Val - SDWA9EncValues::SRC_TTMP_MIN);
2212 }
2213
2214 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2215
2216 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2217 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2218 return MCOperand::createImm(SVal);
2219
2220 return decodeSpecialReg32(SVal);
2221 }
2222 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2223 return createRegOperand(getVgprClassId(Width), Val);
2224 llvm_unreachable("unsupported target");
2225}
2226
2228 return decodeSDWASrc(16, Val);
2229}
2230
2232 return decodeSDWASrc(32, Val);
2233}
2234
2236 using namespace AMDGPU::SDWA;
2237
2238 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2239 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2240 "SDWAVopcDst should be present only on GFX9+");
2241
2242 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2243
2244 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2245 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2246
2247 int TTmpIdx = getTTmpIdx(Val);
2248 if (TTmpIdx >= 0) {
2249 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2250 return createSRegOperand(TTmpClsId, TTmpIdx);
2251 }
2252 if (Val > SGPR_MAX) {
2253 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2254 }
2255 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2256 }
2257 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2258}
2259
2261 unsigned Val) const {
2262 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2263 ? decodeSrcOp(Inst, 32, Val)
2264 : decodeSrcOp(Inst, 64, Val);
2265}
2266
2268 unsigned Val) const {
2269 return decodeSrcOp(Inst, 32, Val);
2270}
2271
2274 return MCOperand();
2275 return MCOperand::createImm(Val);
2276}
2277
2279 using VersionField = AMDGPU::EncodingField<7, 0>;
2280 using W64Bit = AMDGPU::EncodingBit<13>;
2281 using W32Bit = AMDGPU::EncodingBit<14>;
2282 using MDPBit = AMDGPU::EncodingBit<15>;
2284
2285 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2286
2287 // Decode into a plain immediate if any unused bits are raised.
2288 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2289 return MCOperand::createImm(Imm);
2290
2291 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2292 const auto *I = find_if(
2293 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2294 return V.Code == Version;
2295 });
2296 MCContext &Ctx = getContext();
2297 const MCExpr *E;
2298 if (I == Versions.end())
2300 else
2301 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2302
2303 if (W64)
2304 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2305 if (W32)
2306 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2307 if (MDP)
2308 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2309
2310 return MCOperand::createExpr(E);
2311}
2312
2314 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2315}
2316
2318
2320 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2321}
2322
2324
2326
2330
2332 return STI.hasFeature(AMDGPU::FeatureGFX11);
2333}
2334
2338
2340 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2341}
2342
2344 return STI.hasFeature(AMDGPU::FeatureGFX12);
2345}
2346
2350
2352
2356
2358
2362
2364 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2365}
2366
2370
2371//===----------------------------------------------------------------------===//
2372// AMDGPU specific symbol handling
2373//===----------------------------------------------------------------------===//
2374
2375/// Print a string describing the reserved bit range specified by Mask with
2376/// offset BaseBytes for use in error comments. Mask is a single continuous
2377/// range of 1s surrounded by zeros. The format here is meant to align with the
2378/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2379static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2380 SmallString<32> Result;
2381 raw_svector_ostream S(Result);
2382
2383 int TrailingZeros = llvm::countr_zero(Mask);
2384 int PopCount = llvm::popcount(Mask);
2385
2386 if (PopCount == 1) {
2387 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2388 } else {
2389 S << "bits in range ("
2390 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2391 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2392 }
2393
2394 return Result;
2395}
2396
2397#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2398#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2399 do { \
2400 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2401 } while (0)
2402#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2403 do { \
2404 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2405 << GET_FIELD(MASK) << '\n'; \
2406 } while (0)
2407
2408#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2409 do { \
2410 if (FourByteBuffer & (MASK)) { \
2411 return createStringError(std::errc::invalid_argument, \
2412 "kernel descriptor " DESC \
2413 " reserved %s set" MSG, \
2414 getBitRangeFromMask((MASK), 0).c_str()); \
2415 } \
2416 } while (0)
2417
2418#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2419#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2420 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2421#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2422 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2423#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2424 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2425
2426// NOLINTNEXTLINE(readability-identifier-naming)
2428 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2429 using namespace amdhsa;
2430 StringRef Indent = "\t";
2431
2432 // We cannot accurately backward compute #VGPRs used from
2433 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2434 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2435 // simply calculate the inverse of what the assembler does.
2436
2437 uint32_t GranulatedWorkitemVGPRCount =
2438 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2439
2440 uint32_t NextFreeVGPR =
2441 (GranulatedWorkitemVGPRCount + 1) *
2442 AMDGPU::IsaInfo::getVGPREncodingGranule(STI, EnableWavefrontSize32);
2443
2444 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2445
2446 // We cannot backward compute values used to calculate
2447 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2448 // directives can't be computed:
2449 // .amdhsa_reserve_vcc
2450 // .amdhsa_reserve_flat_scratch
2451 // .amdhsa_reserve_xnack_mask
2452 // They take their respective default values if not specified in the assembly.
2453 //
2454 // GRANULATED_WAVEFRONT_SGPR_COUNT
2455 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2456 //
2457 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2458 // are set to 0. So while disassembling we consider that:
2459 //
2460 // GRANULATED_WAVEFRONT_SGPR_COUNT
2461 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2462 //
2463 // The disassembler cannot recover the original values of those 3 directives.
2464
2465 uint32_t GranulatedWavefrontSGPRCount =
2466 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2467
2468 if (isGFX10Plus())
2469 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2470 "must be zero on gfx10+");
2471
2472 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2474
2475 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2477 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2478 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2479 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2480 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2481 << '\n';
2482 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2483
2484 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2485
2486 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2487 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2488 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2489 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2490 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2491 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2492 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2493 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2494
2495 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2496
2497 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2498 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2499 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2500
2501 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2502
2503 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2504 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2505 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2506
2507 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2508 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2509
2510 // Bits [26].
2511 if (isGFX9Plus()) {
2512 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2513 } else {
2514 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2515 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2516 }
2517
2518 // Bits [27].
2519 if (isGFX1250Plus()) {
2520 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2521 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2522 } else {
2523 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2524 "COMPUTE_PGM_RSRC1");
2525 }
2526
2527 // Bits [28].
2528 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2529
2530 // Bits [29-31].
2531 if (isGFX10Plus()) {
2532 // WGP_MODE is not available on GFX1250.
2533 if (!isGFX1250Plus()) {
2534 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2535 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2536 }
2537 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2538 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2539 } else {
2540 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2541 "COMPUTE_PGM_RSRC1");
2542 }
2543
2544 if (isGFX12Plus())
2545 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2546 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2547
2548 return true;
2549}
2550
2551// NOLINTNEXTLINE(readability-identifier-naming)
2553 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2554 using namespace amdhsa;
2555 StringRef Indent = "\t";
2557 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2558 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2559 else
2560 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2561 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2562 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2563 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2564 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2565 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2566 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2567 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2568 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2569 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2570 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2571 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2572
2573 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2574 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2575 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2576
2578 ".amdhsa_exception_fp_ieee_invalid_op",
2579 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2580 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2581 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2583 ".amdhsa_exception_fp_ieee_div_zero",
2584 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2585 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2586 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2587 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2588 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2589 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2590 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2591 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2592 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2593
2594 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2595
2596 return true;
2597}
2598
2599// NOLINTNEXTLINE(readability-identifier-naming)
2601 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2602 using namespace amdhsa;
2603 StringRef Indent = "\t";
2604 if (isGFX90A()) {
2605 KdStream << Indent << ".amdhsa_accum_offset "
2606 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2607 << '\n';
2608
2609 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2610
2611 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2612 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2613 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2614 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2615 } else if (isGFX10Plus()) {
2616 // Bits [0-3].
2617 if (!isGFX12Plus()) {
2618 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2619 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2620 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2621 } else {
2623 "SHARED_VGPR_COUNT",
2624 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2625 }
2626 } else {
2627 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2628 "COMPUTE_PGM_RSRC3",
2629 "must be zero on gfx12+");
2630 }
2631
2632 // Bits [4-11].
2633 if (isGFX11()) {
2634 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2635 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2636 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2637 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2638 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2639 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2640 } else if (isGFX12Plus()) {
2641 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2642 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2643 } else {
2644 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2645 "COMPUTE_PGM_RSRC3",
2646 "must be zero on gfx10");
2647 }
2648
2649 // Bits [12].
2650 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2651 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2652
2653 // Bits [13].
2654 if (isGFX12Plus()) {
2656 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2657 } else {
2658 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2659 "COMPUTE_PGM_RSRC3",
2660 "must be zero on gfx10 or gfx11");
2661 }
2662
2663 // Bits [14-21].
2664 if (isGFX1250Plus()) {
2665 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2666 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2668 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2670 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2672 "ENABLE_DIDT_THROTTLE",
2673 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2674 } else {
2675 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2676 "COMPUTE_PGM_RSRC3",
2677 "must be zero on gfx10+");
2678 }
2679
2680 // Bits [22-30].
2681 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2682 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2683
2684 // Bits [31].
2685 if (isGFX11Plus()) {
2687 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2688 } else {
2689 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2690 "COMPUTE_PGM_RSRC3",
2691 "must be zero on gfx10");
2692 }
2693 } else if (FourByteBuffer) {
2694 return createStringError(
2695 std::errc::invalid_argument,
2696 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2697 }
2698 return true;
2699}
2700#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2701#undef PRINT_DIRECTIVE
2702#undef GET_FIELD
2703#undef CHECK_RESERVED_BITS_IMPL
2704#undef CHECK_RESERVED_BITS
2705#undef CHECK_RESERVED_BITS_MSG
2706#undef CHECK_RESERVED_BITS_DESC
2707#undef CHECK_RESERVED_BITS_DESC_MSG
2708
2709/// Create an error object to return from onSymbolStart for reserved kernel
2710/// descriptor bits being set.
2711static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2712 const char *Msg = "") {
2713 return createStringError(
2714 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2715 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2716}
2717
2718/// Create an error object to return from onSymbolStart for reserved kernel
2719/// descriptor bytes being set.
2720static Error createReservedKDBytesError(unsigned BaseInBytes,
2721 unsigned WidthInBytes) {
2722 // Create an error comment in the same format as the "Kernel Descriptor"
2723 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2724 return createStringError(
2725 std::errc::invalid_argument,
2726 "kernel descriptor reserved bits in range (%u:%u) set",
2727 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2728}
2729
2732 raw_string_ostream &KdStream) const {
2733#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2734 do { \
2735 KdStream << Indent << DIRECTIVE " " \
2736 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2737 } while (0)
2738
2739 uint16_t TwoByteBuffer = 0;
2740 uint32_t FourByteBuffer = 0;
2741
2742 StringRef ReservedBytes;
2743 StringRef Indent = "\t";
2744
2745 assert(Bytes.size() == 64);
2746 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2747
2748 switch (Cursor.tell()) {
2750 FourByteBuffer = DE.getU32(Cursor);
2751 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2752 << '\n';
2753 return true;
2754
2756 FourByteBuffer = DE.getU32(Cursor);
2757 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2758 << FourByteBuffer << '\n';
2759 return true;
2760
2762 FourByteBuffer = DE.getU32(Cursor);
2763 KdStream << Indent << ".amdhsa_kernarg_size "
2764 << FourByteBuffer << '\n';
2765 return true;
2766
2768 // 4 reserved bytes, must be 0.
2769 ReservedBytes = DE.getBytes(Cursor, 4);
2770 for (char B : ReservedBytes) {
2771 if (B != 0)
2773 }
2774 return true;
2775
2777 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2778 // So far no directive controls this for Code Object V3, so simply skip for
2779 // disassembly.
2780 DE.skip(Cursor, 8);
2781 return true;
2782
2784 // 20 reserved bytes, must be 0.
2785 ReservedBytes = DE.getBytes(Cursor, 20);
2786 for (char B : ReservedBytes) {
2787 if (B != 0)
2789 }
2790 return true;
2791
2793 FourByteBuffer = DE.getU32(Cursor);
2794 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2795
2797 FourByteBuffer = DE.getU32(Cursor);
2798 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2799
2801 FourByteBuffer = DE.getU32(Cursor);
2802 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2803
2805 using namespace amdhsa;
2806 TwoByteBuffer = DE.getU16(Cursor);
2807
2809 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2810 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2811 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2812 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2813 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2814 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2815 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2816 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2817 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2818 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2820 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2821 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2822 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2823 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2824
2825 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2826 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2828
2829 // Reserved for GFX9
2830 if (isGFX9() &&
2831 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2833 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2834 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2835 }
2836 if (isGFX10Plus()) {
2837 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2838 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2839 }
2840
2841 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2842 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2843 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2844
2845 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2846 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2848 }
2849
2850 return true;
2851
2853 using namespace amdhsa;
2854 TwoByteBuffer = DE.getU16(Cursor);
2855 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2856 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2857 KERNARG_PRELOAD_SPEC_LENGTH);
2858 }
2859
2860 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2861 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2862 KERNARG_PRELOAD_SPEC_OFFSET);
2863 }
2864 return true;
2865
2867 // 4 bytes from here are reserved, must be 0.
2868 ReservedBytes = DE.getBytes(Cursor, 4);
2869 for (char B : ReservedBytes) {
2870 if (B != 0)
2872 }
2873 return true;
2874
2875 default:
2876 llvm_unreachable("Unhandled index. Case statements cover everything.");
2877 return true;
2878 }
2879#undef PRINT_DIRECTIVE
2880}
2881
2883 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2884
2885 // CP microcode requires the kernel descriptor to be 64 aligned.
2886 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2887 return createStringError(std::errc::invalid_argument,
2888 "kernel descriptor must be 64-byte aligned");
2889
2890 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2891 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2892 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2893 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2894 // when required.
2895 if (isGFX10Plus()) {
2896 uint16_t KernelCodeProperties =
2899 EnableWavefrontSize32 =
2900 AMDHSA_BITS_GET(KernelCodeProperties,
2901 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2902 }
2903
2904 std::string Kd;
2905 raw_string_ostream KdStream(Kd);
2906 KdStream << ".amdhsa_kernel " << KdName << '\n';
2907
2909 while (C && C.tell() < Bytes.size()) {
2910 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2911
2912 cantFail(C.takeError());
2913
2914 if (!Res)
2915 return Res;
2916 }
2917 KdStream << ".end_amdhsa_kernel\n";
2918 outs() << KdStream.str();
2919 return true;
2920}
2921
2923 uint64_t &Size,
2924 ArrayRef<uint8_t> Bytes,
2925 uint64_t Address) const {
2926 // Right now only kernel descriptor needs to be handled.
2927 // We ignore all other symbols for target specific handling.
2928 // TODO:
2929 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2930 // Object V2 and V3 when symbols are marked protected.
2931
2932 // amd_kernel_code_t for Code Object V2.
2933 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2934 Size = 256;
2935 return createStringError(std::errc::invalid_argument,
2936 "code object v2 is not supported");
2937 }
2938
2939 // Code Object V3 kernel descriptors.
2940 StringRef Name = Symbol.Name;
2941 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2942 Size = 64; // Size = 64 regardless of success or failure.
2943 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2944 }
2945
2946 return false;
2947}
2948
2949const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2950 int64_t Val) {
2951 MCContext &Ctx = getContext();
2952 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2953 // Note: only set value to Val on a new symbol in case an dissassembler
2954 // has already been initialized in this context.
2955 if (!Sym->isVariable()) {
2957 } else {
2958 int64_t Res = ~Val;
2959 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2960 if (!Valid || Res != Val)
2961 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2962 }
2963 return MCSymbolRefExpr::create(Sym, Ctx);
2964}
2965
2967 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2968
2969 // Check for MUBUF and MTBUF instructions
2970 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2971 return true;
2972
2973 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2974 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2975 return true;
2976
2977 return false;
2978}
2979
2980//===----------------------------------------------------------------------===//
2981// AMDGPUSymbolizer
2982//===----------------------------------------------------------------------===//
2983
2984// Try to find symbol name for specified label
2986 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2987 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2988 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2989
2990 if (!IsBranch) {
2991 return false;
2992 }
2993
2994 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2995 if (!Symbols)
2996 return false;
2997
2998 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2999 return Val.Addr == static_cast<uint64_t>(Value) &&
3000 Val.Type == ELF::STT_NOTYPE;
3001 });
3002 if (Result != Symbols->end()) {
3003 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
3004 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
3006 return true;
3007 }
3008 // Add to list of referenced addresses, so caller can synthesize a label.
3009 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
3010 return false;
3011}
3012
3014 int64_t Value,
3015 uint64_t Address) {
3016 llvm_unreachable("unimplemented");
3017}
3018
3019//===----------------------------------------------------------------------===//
3020// Initialization
3021//===----------------------------------------------------------------------===//
3022
3024 LLVMOpInfoCallback /*GetOpInfo*/,
3025 LLVMSymbolLookupCallback /*SymbolLookUp*/,
3026 void *DisInfo,
3027 MCContext *Ctx,
3028 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
3029 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
3030}
3031
3033 const MCSubtargetInfo &STI,
3034 MCContext &Ctx) {
3035 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
3036}
3037
3038extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
#define AMDGPU_MACH_LIST(X)
Definition ELF.h:768
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
void emitTargetIDIfSupported(raw_ostream &OS, unsigned EFlags) const override
Emit something based on ELF's e_flags if the target needs to.
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:407
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:213
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:430
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:448
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:416
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:423
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:436
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:441
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:426
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:425
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:420
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:415
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:422
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:421
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:424
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:435
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:433
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:427
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:419
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:453
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:454
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:428
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:418
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:438
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:434
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:440
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:429
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:455
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:437
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:417
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1419
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ STT_OBJECT
Definition ELF.h:1420
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition ELF.h:904
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition ELF.h:915
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition ELF.h:919
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition ELF.h:902
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition ELF.h:906
@ EF_AMDGPU_FEATURE_XNACK_V4
Definition ELF.h:900
@ EF_AMDGPU_FEATURE_SRAMECC_V4
Definition ELF.h:913
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition ELF.h:908
@ EF_AMDGPU_MACH
Definition ELF.h:848
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition ELF.h:917
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition ELF.h:921
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.