LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
531 Imm = getInlineImmValF16(Imm);
532 break;
535 Imm = getInlineImmValF16(Imm);
536 break;
538 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
539 // halves, so we need to produce the duplicated value for correct
540 // round-trip.
541 if (isGFX11Plus()) {
542 int64_t F16Val = getInlineImmValF16(Imm);
543 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
544 } else {
545 Imm = getInlineImmValF16(Imm);
546 }
547 break;
548 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX1170() &&
594 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX11() &&
599 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX1250() &&
604 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
610 DecW, Address, CS))
611 break;
612
613 if (isGFX12() &&
614 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
615 break;
616
617 if (isGFX13() &&
618 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
619 DecW, Address, CS))
620 break;
621
622 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
623 // Return 8 bytes for a potential literal.
624 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
625
626 if (isGFX1250() &&
627 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
628 break;
629 }
630
631 // Reinitialize Bytes
632 Bytes = Bytes_.slice(0, MaxInstBytesNum);
633
634 } else if (Bytes.size() >= 16 &&
635 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
636 std::bitset<128> DecW = eat16Bytes(Bytes);
637 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
638 break;
639
640 // Reinitialize Bytes
641 Bytes = Bytes_.slice(0, MaxInstBytesNum);
642 }
643
644 if (Bytes.size() >= 8) {
645 const uint64_t QW = eatBytes<uint64_t>(Bytes);
646
647 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
648 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
649 break;
650
651 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
652 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
653 break;
654
655 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
656 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
657 break;
658
659 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
660 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
661 // table first so we print the correct name.
662 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
663 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
664 break;
665
666 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
667 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
668 break;
669
670 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
671 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
672 break;
673
674 if ((isVI() || isGFX9()) &&
675 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
676 break;
677
678 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
679 break;
680
681 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
682 break;
683
684 if (isGFX1250() &&
685 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
686 QW, Address, CS))
687 break;
688
689 if (isGFX12() &&
690 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
691 Address, CS))
692 break;
693
694 if (isGFX1170() &&
695 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
696 QW, Address, CS))
697 break;
698
699 if (isGFX11() &&
700 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
701 Address, CS))
702 break;
703
704 if (isGFX1170() &&
705 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
706 break;
707
708 if (isGFX11() &&
709 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
710 break;
711
712 if (isGFX12() &&
713 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
714 break;
715
716 if (isGFX13() &&
717 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
718 Address, CS))
719 break;
720
721 // Reinitialize Bytes
722 Bytes = Bytes_.slice(0, MaxInstBytesNum);
723 }
724
725 // Try decode 32-bit instruction
726 if (Bytes.size() >= 4) {
727 const uint32_t DW = eatBytes<uint32_t>(Bytes);
728
729 if ((isVI() || isGFX9()) &&
730 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
731 break;
732
733 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
734 break;
735
736 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
737 break;
738
739 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
740 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
741 break;
742
743 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
744 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
745 break;
746
747 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
748 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
749 break;
750
751 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
752 break;
753
754 if (isGFX1170() &&
755 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
756 DW, Address, CS))
757 break;
758
759 if (isGFX11() &&
760 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
761 Address, CS))
762 break;
763
764 if (isGFX1250() &&
765 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
766 DW, Address, CS))
767 break;
768
769 if (isGFX12() &&
770 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
771 Address, CS))
772 break;
773
774 if (isGFX13() &&
775 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
776 Address, CS))
777 break;
778 }
779
781 } while (false);
782
784
785 decodeImmOperands(MI, *MCII);
786
787 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
788 if (isMacDPP(MI))
790
791 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
793 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
794 convertVOPCDPPInst(MI); // Special VOP3 case
795 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
796 convertVOPC64DPPInst(MI); // Special VOP3 case
797 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
798 -1)
800 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
801 convertVOP3DPPInst(MI); // Regular VOP3 case
802 }
803
805
806 if (AMDGPU::isMAC(MI.getOpcode())) {
807 // Insert dummy unused src2_modifiers.
809 AMDGPU::OpName::src2_modifiers);
810 }
811
812 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
813 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
814 // Insert dummy unused src2_modifiers.
816 AMDGPU::OpName::src2_modifiers);
817 }
818
819 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
821 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
822 }
823
824 if (MCII->get(MI.getOpcode()).TSFlags &
826 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
827 AMDGPU::OpName::cpol);
828 if (CPolPos != -1) {
829 unsigned CPol =
830 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
832 if (MI.getNumOperands() <= (unsigned)CPolPos) {
834 AMDGPU::OpName::cpol);
835 } else if (CPol) {
836 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
837 }
838 }
839 }
840
841 if ((MCII->get(MI.getOpcode()).TSFlags &
843 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
844 // GFX90A lost TFE, its place is occupied by ACC.
845 int TFEOpIdx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
847 if (TFEOpIdx != -1) {
848 auto *TFEIter = MI.begin();
849 std::advance(TFEIter, TFEOpIdx);
850 MI.insert(TFEIter, MCOperand::createImm(0));
851 }
852 }
853
854 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
856 int OffsetIdx =
857 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
858 if (OffsetIdx != -1) {
859 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
860 int64_t SignedOffset = SignExtend64<24>(Imm);
861 if (SignedOffset < 0)
863 }
864 }
865
866 if (MCII->get(MI.getOpcode()).TSFlags &
868 int SWZOpIdx =
869 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
870 if (SWZOpIdx != -1) {
871 auto *SWZIter = MI.begin();
872 std::advance(SWZIter, SWZOpIdx);
873 MI.insert(SWZIter, MCOperand::createImm(0));
874 }
875 }
876
877 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
878 if (Desc.TSFlags & SIInstrFlags::MIMG) {
879 int VAddr0Idx =
880 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
881 int RsrcIdx =
882 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
883 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
884 if (VAddr0Idx >= 0 && NSAArgs > 0) {
885 unsigned NSAWords = (NSAArgs + 3) / 4;
886 if (Bytes.size() < 4 * NSAWords)
888 for (unsigned i = 0; i < NSAArgs; ++i) {
889 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
890 auto VAddrRCID =
891 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
892 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
893 }
894 Bytes = Bytes.slice(4 * NSAWords);
895 }
896
898 }
899
900 if (MCII->get(MI.getOpcode()).TSFlags &
903
904 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
906
907 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
909
910 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
912
913 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
915
916 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
918
919 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
920 AMDGPU::OpName::vdst_in);
921 if (VDstIn_Idx != -1) {
922 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
924 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
925 !MI.getOperand(VDstIn_Idx).isReg() ||
926 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
927 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
928 MI.erase(&MI.getOperand(VDstIn_Idx));
930 MCOperand::createReg(MI.getOperand(Tied).getReg()),
931 AMDGPU::OpName::vdst_in);
932 }
933 }
934
935 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
936 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
938
939 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
940 // have EXEC as implicit destination. Issue a warning if encoding for
941 // vdst is not EXEC.
942 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
943 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
944 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
945 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
946 if (Bytes_[0] != ExecEncoding)
948 }
949
950 Size = MaxInstBytesNum - Bytes.size();
951 return Status;
952}
953
955 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
956 // The MCInst still has these fields even though they are no longer encoded
957 // in the GFX11 instruction.
958 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
959 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
960 }
961}
962
965 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
987 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
988 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
989 // The MCInst has this field that is not directly encoded in the
990 // instruction.
991 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
992 }
993}
994
996 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
997 STI.hasFeature(AMDGPU::FeatureGFX10)) {
998 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
999 // VOPC - insert clamp
1000 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
1001 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1002 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1003 if (SDst != -1) {
1004 // VOPC - insert VCC register as sdst
1006 AMDGPU::OpName::sdst);
1007 } else {
1008 // VOP1/2 - insert omod if present in instruction
1009 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1010 }
1011 }
1012}
1013
1014/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1015/// appropriate subregister for the used format width.
1017 MCOperand &MO, uint8_t NumRegs) {
1018 switch (NumRegs) {
1019 case 4:
1020 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1021 case 6:
1022 return MO.setReg(
1023 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1024 case 8:
1025 if (MCRegister NewReg = MRI.getSubReg(
1026 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1027 MO.setReg(NewReg);
1028 }
1029 return;
1030 case 12: {
1031 // There is no 384-bit subreg index defined.
1032 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1033 MCRegister NewReg = MRI.getMatchingSuperReg(
1034 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1035 return MO.setReg(NewReg);
1036 }
1037 case 16:
1038 // No-op in cases where one operand is still f8/bf8.
1039 return;
1040 default:
1041 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1042 }
1043}
1044
1045/// f8f6f4 instructions have different pseudos depending on the used formats. In
1046/// the disassembler table, we only have the variants with the largest register
1047/// classes which assume using an fp8/bf8 format for both operands. The actual
1048/// register class depends on the format in blgp and cbsz operands. Adjust the
1049/// register classes depending on the used format.
1051 int BlgpIdx =
1052 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1053 if (BlgpIdx == -1)
1054 return;
1055
1056 int CbszIdx =
1057 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1058
1059 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1060 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1061
1062 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1063 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1064 if (!AdjustedRegClassOpcode ||
1065 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1066 return;
1067
1068 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1069 int Src0Idx =
1070 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1071 int Src1Idx =
1072 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1073 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1074 AdjustedRegClassOpcode->NumRegsSrcA);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1076 AdjustedRegClassOpcode->NumRegsSrcB);
1077}
1078
1080 int FmtAIdx =
1081 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1082 if (FmtAIdx == -1)
1083 return;
1084
1085 int FmtBIdx =
1086 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1087
1088 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1089 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1090
1091 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1092 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1093 if (!AdjustedRegClassOpcode ||
1094 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1095 return;
1096
1097 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1098 int Src0Idx =
1099 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1100 int Src1Idx =
1101 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1102 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1103 AdjustedRegClassOpcode->NumRegsSrcA);
1104 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1105 AdjustedRegClassOpcode->NumRegsSrcB);
1106}
1107
1109 unsigned OpSel = 0;
1110 unsigned OpSelHi = 0;
1111 unsigned NegLo = 0;
1112 unsigned NegHi = 0;
1113};
1114
1115// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1116// Note that these values do not affect disassembler output,
1117// so this is only necessary for consistency with src_modifiers.
1119 bool IsVOP3P = false) {
1120 VOPModifiers Modifiers;
1121 unsigned Opc = MI.getOpcode();
1122 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1123 AMDGPU::OpName::src1_modifiers,
1124 AMDGPU::OpName::src2_modifiers};
1125 for (int J = 0; J < 3; ++J) {
1126 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1127 if (OpIdx == -1)
1128 continue;
1129
1130 unsigned Val = MI.getOperand(OpIdx).getImm();
1131
1132 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1133 if (IsVOP3P) {
1134 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1135 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1136 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1137 } else if (J == 0) {
1138 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1139 }
1140 }
1141
1142 return Modifiers;
1143}
1144
1145// Instructions decode the op_sel/suffix bits into the src_modifier
1146// operands. Copy those bits into the src operands for true16 VGPRs.
1148 const unsigned Opc = MI.getOpcode();
1149 const MCRegisterClass &ConversionRC =
1150 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1151 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1152 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1154 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1156 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1158 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1160 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1161 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1162 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1163 if (OpIdx == -1 || OpModsIdx == -1)
1164 continue;
1165 MCOperand &Op = MI.getOperand(OpIdx);
1166 if (!Op.isReg())
1167 continue;
1168 if (!ConversionRC.contains(Op.getReg()))
1169 continue;
1170 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1171 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1172 unsigned ModVal = OpMods.getImm();
1173 if (ModVal & OpSelMask) { // isHi
1174 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1175 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1176 }
1177 }
1178}
1179
1180// MAC opcodes have special old and src2 operands.
1181// src2 is tied to dst, while old is not tied (but assumed to be).
1183 constexpr int DST_IDX = 0;
1184 auto Opcode = MI.getOpcode();
1185 const auto &Desc = MCII->get(Opcode);
1186 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1187
1188 if (OldIdx != -1 && Desc.getOperandConstraint(
1189 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1190 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1191 assert(Desc.getOperandConstraint(
1192 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1194 (void)DST_IDX;
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201// Create dummy old operand and insert dummy unused src2_modifiers
1203 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1204 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1206 AMDGPU::OpName::src2_modifiers);
1207}
1208
1210 unsigned Opc = MI.getOpcode();
1211
1212 int VDstInIdx =
1213 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1214 if (VDstInIdx != -1)
1215 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1216
1217 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1218 if (MI.getNumOperands() < DescNumOps &&
1219 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1221 auto Mods = collectVOPModifiers(MI);
1223 AMDGPU::OpName::op_sel);
1224 } else {
1225 // Insert dummy unused src modifiers.
1226 if (MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1229 AMDGPU::OpName::src0_modifiers);
1230
1231 if (MI.getNumOperands() < DescNumOps &&
1232 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1234 AMDGPU::OpName::src1_modifiers);
1235 }
1236}
1237
1240
1241 int VDstInIdx =
1242 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1243 if (VDstInIdx != -1)
1244 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1245
1246 unsigned Opc = MI.getOpcode();
1247 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1248 if (MI.getNumOperands() < DescNumOps &&
1249 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1250 auto Mods = collectVOPModifiers(MI);
1252 AMDGPU::OpName::op_sel);
1253 }
1254}
1255
1256// Given a wide tuple \p Reg check if it will overflow 256 registers.
1257// \returns \p Reg on success or NoRegister otherwise.
1259 const MCRegisterInfo &MRI) {
1260 unsigned NumRegs = RC.getSizeInBits() / 32;
1261 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1262 if (!Sub0)
1263 return Reg;
1264
1265 MCRegister BaseReg;
1266 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1267 BaseReg = AMDGPU::VGPR0;
1268 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1269 BaseReg = AMDGPU::AGPR0;
1270
1271 assert(BaseReg && "Only vector registers expected");
1272
1273 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1274}
1275
1276// Note that before gfx10, the MIMG encoding provided no information about
1277// VADDR size. Consequently, decoded instructions always show address as if it
1278// has 1 dword, which could be not really so.
1280 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1281
1282 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1283 AMDGPU::OpName::vdst);
1284
1285 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1286 AMDGPU::OpName::vdata);
1287 int VAddr0Idx =
1288 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1289 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1290 ? AMDGPU::OpName::srsrc
1291 : AMDGPU::OpName::rsrc;
1292 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1293 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1294 AMDGPU::OpName::dmask);
1295
1296 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1297 AMDGPU::OpName::tfe);
1298 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1299 AMDGPU::OpName::d16);
1300
1301 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1302 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1303 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1304
1305 assert(VDataIdx != -1);
1306 if (BaseOpcode->BVH) {
1307 // Add A16 operand for intersect_ray instructions
1308 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1309 return;
1310 }
1311
1312 bool IsAtomic = (VDstIdx != -1);
1313 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1314 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1315 bool IsNSA = false;
1316 bool IsPartialNSA = false;
1317 unsigned AddrSize = Info->VAddrDwords;
1318
1319 if (isGFX10Plus()) {
1320 unsigned DimIdx =
1321 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1322 int A16Idx =
1323 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1324 const AMDGPU::MIMGDimInfo *Dim =
1325 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1326 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1327
1328 AddrSize =
1329 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1330
1331 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1332 // VIMAGE insts other than BVH never use vaddr4.
1333 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1335 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1336 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1337 if (!IsNSA) {
1338 if (!IsVSample && AddrSize > 12)
1339 AddrSize = 16;
1340 } else {
1341 if (AddrSize > Info->VAddrDwords) {
1342 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1343 // The NSA encoding does not contain enough operands for the
1344 // combination of base opcode / dimension. Should this be an error?
1345 return;
1346 }
1347 IsPartialNSA = true;
1348 }
1349 }
1350 }
1351
1352 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1353 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1354
1355 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1356 if (D16 && AMDGPU::hasPackedD16(STI)) {
1357 DstSize = (DstSize + 1) / 2;
1358 }
1359
1360 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1361 DstSize += 1;
1362
1363 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1364 return;
1365
1366 int NewOpcode =
1367 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1368 if (NewOpcode == -1)
1369 return;
1370
1371 // Widen the register to the correct number of enabled channels.
1372 MCRegister NewVdata;
1373 if (DstSize != Info->VDataDwords) {
1374 auto DataRCID = MCII->getOpRegClassID(
1375 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1376
1377 // Get first subregister of VData
1378 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1379 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1380 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1381
1382 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1383 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1384 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1385 if (!NewVdata) {
1386 // It's possible to encode this such that the low register + enabled
1387 // components exceeds the register count.
1388 return;
1389 }
1390 }
1391
1392 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1393 // If using partial NSA on GFX11+ widen last address register.
1394 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1395 MCRegister NewVAddrSA;
1396 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1397 AddrSize != Info->VAddrDwords) {
1398 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1399 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1400 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1401
1402 auto AddrRCID = MCII->getOpRegClassID(
1403 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1404
1405 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1406 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1407 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1408 if (!NewVAddrSA)
1409 return;
1410 }
1411
1412 MI.setOpcode(NewOpcode);
1413
1414 if (NewVdata != AMDGPU::NoRegister) {
1415 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1416
1417 if (IsAtomic) {
1418 // Atomic operations have an additional operand (a copy of data)
1419 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1420 }
1421 }
1422
1423 if (NewVAddrSA) {
1424 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1425 } else if (IsNSA) {
1426 assert(AddrSize <= Info->VAddrDwords);
1427 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1428 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1429 }
1430}
1431
1432// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1433// decoder only adds to src_modifiers, so manually add the bits to the other
1434// operands.
1436 unsigned Opc = MI.getOpcode();
1437 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1438 auto Mods = collectVOPModifiers(MI, true);
1439
1440 if (MI.getNumOperands() < DescNumOps &&
1441 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1442 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1443
1444 if (MI.getNumOperands() < DescNumOps &&
1445 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1447 AMDGPU::OpName::op_sel);
1448 if (MI.getNumOperands() < DescNumOps &&
1449 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1451 AMDGPU::OpName::op_sel_hi);
1452 if (MI.getNumOperands() < DescNumOps &&
1453 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1455 AMDGPU::OpName::neg_lo);
1456 if (MI.getNumOperands() < DescNumOps &&
1457 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1459 AMDGPU::OpName::neg_hi);
1460}
1461
1462// Create dummy old operand and insert optional operands
1464 unsigned Opc = MI.getOpcode();
1465 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1466
1467 if (MI.getNumOperands() < DescNumOps &&
1468 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1469 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1470
1471 if (MI.getNumOperands() < DescNumOps &&
1472 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1474 AMDGPU::OpName::src0_modifiers);
1475
1476 if (MI.getNumOperands() < DescNumOps &&
1477 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1479 AMDGPU::OpName::src1_modifiers);
1480}
1481
1483 unsigned Opc = MI.getOpcode();
1484 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1485
1487
1488 if (MI.getNumOperands() < DescNumOps &&
1489 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1492 AMDGPU::OpName::op_sel);
1493 }
1494}
1495
1497 assert(HasLiteral && "Should have decoded a literal");
1498 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1499}
1500
1501const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1502 return getContext().getRegisterInfo()->
1503 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1504}
1505
1506inline
1508 const Twine& ErrMsg) const {
1509 *CommentStream << "Error: " + ErrMsg;
1510
1511 // ToDo: add support for error operands to MCInst.h
1512 // return MCOperand::createError(V);
1513 return MCOperand();
1514}
1515
1519
1520inline
1522 unsigned Val) const {
1523 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1524 if (Val >= RegCl.getNumRegs())
1525 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1526 ": unknown register " + Twine(Val));
1527 return createRegOperand(RegCl.getRegister(Val));
1528}
1529
1530inline
1532 unsigned Val) const {
1533 // ToDo: SI/CI have 104 SGPRs, VI - 102
1534 // Valery: here we accepting as much as we can, let assembler sort it out
1535 int shift = 0;
1536 switch (SRegClassID) {
1537 case AMDGPU::SGPR_32RegClassID:
1538 case AMDGPU::TTMP_32RegClassID:
1539 break;
1540 case AMDGPU::SGPR_64RegClassID:
1541 case AMDGPU::TTMP_64RegClassID:
1542 shift = 1;
1543 break;
1544 case AMDGPU::SGPR_96RegClassID:
1545 case AMDGPU::TTMP_96RegClassID:
1546 case AMDGPU::SGPR_128RegClassID:
1547 case AMDGPU::TTMP_128RegClassID:
1548 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1549 // this bundle?
1550 case AMDGPU::SGPR_256RegClassID:
1551 case AMDGPU::TTMP_256RegClassID:
1552 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1553 // this bundle?
1554 case AMDGPU::SGPR_288RegClassID:
1555 case AMDGPU::TTMP_288RegClassID:
1556 case AMDGPU::SGPR_320RegClassID:
1557 case AMDGPU::TTMP_320RegClassID:
1558 case AMDGPU::SGPR_352RegClassID:
1559 case AMDGPU::TTMP_352RegClassID:
1560 case AMDGPU::SGPR_384RegClassID:
1561 case AMDGPU::TTMP_384RegClassID:
1562 case AMDGPU::SGPR_512RegClassID:
1563 case AMDGPU::TTMP_512RegClassID:
1564 shift = 2;
1565 break;
1566 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1567 // this bundle?
1568 default:
1569 llvm_unreachable("unhandled register class");
1570 }
1571
1572 if (Val % (1 << shift)) {
1573 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1574 << ": scalar reg isn't aligned " << Val;
1575 }
1576
1577 return createRegOperand(SRegClassID, Val >> shift);
1578}
1579
1581 bool IsHi) const {
1582 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1583 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1584}
1585
1586// Decode Literals for insts which always have a literal in the encoding
1589 if (HasLiteral) {
1590 assert(
1592 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1593 if (Literal != Val)
1594 return errOperand(Val, "More than one unique literal is illegal");
1595 }
1596 HasLiteral = true;
1597 Literal = Val;
1598 return MCOperand::createImm(Literal);
1599}
1600
1603 if (HasLiteral) {
1604 if (Literal != Val)
1605 return errOperand(Val, "More than one unique literal is illegal");
1606 }
1607 HasLiteral = true;
1608 Literal = Val;
1609
1610 bool UseLit64 = Hi_32(Literal) == 0;
1612 LitModifier::Lit64, Literal, getContext()))
1613 : MCOperand::createImm(Literal);
1614}
1615
1618 const MCOperandInfo &OpDesc) const {
1619 // For now all literal constants are supposed to be unsigned integer
1620 // ToDo: deal with signed/unsigned 64-bit integer constants
1621 // ToDo: deal with float/double constants
1622 if (!HasLiteral) {
1623 if (Bytes.size() < 4) {
1624 return errOperand(0, "cannot read literal, inst bytes left " +
1625 Twine(Bytes.size()));
1626 }
1627 HasLiteral = true;
1628 Literal = eatBytes<uint32_t>(Bytes);
1629 }
1630
1631 // For disassembling always assume all inline constants are available.
1632 bool HasInv2Pi = true;
1633
1634 // Invalid instruction codes may contain literals for inline-only
1635 // operands, so we support them here as well.
1636 int64_t Val = Literal;
1637 bool UseLit = false;
1638 switch (OpDesc.OperandType) {
1639 default:
1640 llvm_unreachable("Unexpected operand type!");
1644 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1645 break;
1648 break;
1652 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1653 break;
1655 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1656 break;
1659 break;
1661 break;
1665 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1666 break;
1668 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1669 break;
1679 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1680 break;
1685 UseLit = AMDGPU::isInlinableLiteral64(Val << 32, HasInv2Pi);
1686 if (!UseLit)
1687 Val <<= 32;
1688 break;
1692 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1693 break;
1695 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1696 // decoding a literal in a position of a register operand. Give
1697 // it special handling in the caller, decodeImmOperands(), instead
1698 // of quietly allowing it here.
1699 break;
1700 }
1701
1704 : MCOperand::createImm(Val);
1705}
1706
1708 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1709
1710 if (!HasLiteral) {
1711 if (Bytes.size() < 8) {
1712 return errOperand(0, "cannot read literal64, inst bytes left " +
1713 Twine(Bytes.size()));
1714 }
1715 HasLiteral = true;
1716 Literal = eatBytes<uint64_t>(Bytes);
1717 }
1718
1719 bool UseLit64 = Hi_32(Literal) == 0;
1720
1721 UseLit64 |= AMDGPU::isInlinableLiteral64(
1722 Literal, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1723
1725 LitModifier::Lit64, Literal, getContext()))
1726 : MCOperand::createImm(Literal);
1727}
1728
1730 using namespace AMDGPU::EncValues;
1731
1732 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1733 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1734 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1735 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1736 // Cast prevents negative overflow.
1737}
1738
1739static int64_t getInlineImmVal32(unsigned Imm) {
1740 switch (Imm) {
1741 case 240:
1742 return llvm::bit_cast<uint32_t>(0.5f);
1743 case 241:
1744 return llvm::bit_cast<uint32_t>(-0.5f);
1745 case 242:
1746 return llvm::bit_cast<uint32_t>(1.0f);
1747 case 243:
1748 return llvm::bit_cast<uint32_t>(-1.0f);
1749 case 244:
1750 return llvm::bit_cast<uint32_t>(2.0f);
1751 case 245:
1752 return llvm::bit_cast<uint32_t>(-2.0f);
1753 case 246:
1754 return llvm::bit_cast<uint32_t>(4.0f);
1755 case 247:
1756 return llvm::bit_cast<uint32_t>(-4.0f);
1757 case 248: // 1 / (2 * PI)
1758 return 0x3e22f983;
1759 default:
1760 llvm_unreachable("invalid fp inline imm");
1761 }
1762}
1763
1764static int64_t getInlineImmVal64(unsigned Imm) {
1765 switch (Imm) {
1766 case 240:
1767 return llvm::bit_cast<uint64_t>(0.5);
1768 case 241:
1769 return llvm::bit_cast<uint64_t>(-0.5);
1770 case 242:
1771 return llvm::bit_cast<uint64_t>(1.0);
1772 case 243:
1773 return llvm::bit_cast<uint64_t>(-1.0);
1774 case 244:
1775 return llvm::bit_cast<uint64_t>(2.0);
1776 case 245:
1777 return llvm::bit_cast<uint64_t>(-2.0);
1778 case 246:
1779 return llvm::bit_cast<uint64_t>(4.0);
1780 case 247:
1781 return llvm::bit_cast<uint64_t>(-4.0);
1782 case 248: // 1 / (2 * PI)
1783 return 0x3fc45f306dc9c882;
1784 default:
1785 llvm_unreachable("invalid fp inline imm");
1786 }
1787}
1788
1789static int64_t getInlineImmValF16(unsigned Imm) {
1790 switch (Imm) {
1791 case 240:
1792 return 0x3800;
1793 case 241:
1794 return 0xB800;
1795 case 242:
1796 return 0x3C00;
1797 case 243:
1798 return 0xBC00;
1799 case 244:
1800 return 0x4000;
1801 case 245:
1802 return 0xC000;
1803 case 246:
1804 return 0x4400;
1805 case 247:
1806 return 0xC400;
1807 case 248: // 1 / (2 * PI)
1808 return 0x3118;
1809 default:
1810 llvm_unreachable("invalid fp inline imm");
1811 }
1812}
1813
1814static int64_t getInlineImmValBF16(unsigned Imm) {
1815 switch (Imm) {
1816 case 240:
1817 return 0x3F00;
1818 case 241:
1819 return 0xBF00;
1820 case 242:
1821 return 0x3F80;
1822 case 243:
1823 return 0xBF80;
1824 case 244:
1825 return 0x4000;
1826 case 245:
1827 return 0xC000;
1828 case 246:
1829 return 0x4080;
1830 case 247:
1831 return 0xC080;
1832 case 248: // 1 / (2 * PI)
1833 return 0x3E22;
1834 default:
1835 llvm_unreachable("invalid fp inline imm");
1836 }
1837}
1838
1839unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1840 using namespace AMDGPU;
1841
1842 switch (Width) {
1843 case 16:
1844 case 32:
1845 return VGPR_32RegClassID;
1846 case 64:
1847 return VReg_64RegClassID;
1848 case 96:
1849 return VReg_96RegClassID;
1850 case 128:
1851 return VReg_128RegClassID;
1852 case 160:
1853 return VReg_160RegClassID;
1854 case 192:
1855 return VReg_192RegClassID;
1856 case 256:
1857 return VReg_256RegClassID;
1858 case 288:
1859 return VReg_288RegClassID;
1860 case 320:
1861 return VReg_320RegClassID;
1862 case 352:
1863 return VReg_352RegClassID;
1864 case 384:
1865 return VReg_384RegClassID;
1866 case 512:
1867 return VReg_512RegClassID;
1868 case 1024:
1869 return VReg_1024RegClassID;
1870 }
1871 llvm_unreachable("Invalid register width!");
1872}
1873
1874unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1875 using namespace AMDGPU;
1876
1877 switch (Width) {
1878 case 16:
1879 case 32:
1880 return AGPR_32RegClassID;
1881 case 64:
1882 return AReg_64RegClassID;
1883 case 96:
1884 return AReg_96RegClassID;
1885 case 128:
1886 return AReg_128RegClassID;
1887 case 160:
1888 return AReg_160RegClassID;
1889 case 256:
1890 return AReg_256RegClassID;
1891 case 288:
1892 return AReg_288RegClassID;
1893 case 320:
1894 return AReg_320RegClassID;
1895 case 352:
1896 return AReg_352RegClassID;
1897 case 384:
1898 return AReg_384RegClassID;
1899 case 512:
1900 return AReg_512RegClassID;
1901 case 1024:
1902 return AReg_1024RegClassID;
1903 }
1904 llvm_unreachable("Invalid register width!");
1905}
1906
1907unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1908 using namespace AMDGPU;
1909
1910 switch (Width) {
1911 case 16:
1912 case 32:
1913 return SGPR_32RegClassID;
1914 case 64:
1915 return SGPR_64RegClassID;
1916 case 96:
1917 return SGPR_96RegClassID;
1918 case 128:
1919 return SGPR_128RegClassID;
1920 case 160:
1921 return SGPR_160RegClassID;
1922 case 256:
1923 return SGPR_256RegClassID;
1924 case 288:
1925 return SGPR_288RegClassID;
1926 case 320:
1927 return SGPR_320RegClassID;
1928 case 352:
1929 return SGPR_352RegClassID;
1930 case 384:
1931 return SGPR_384RegClassID;
1932 case 512:
1933 return SGPR_512RegClassID;
1934 }
1935 llvm_unreachable("Invalid register width!");
1936}
1937
1938unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1939 using namespace AMDGPU;
1940
1941 switch (Width) {
1942 case 16:
1943 case 32:
1944 return TTMP_32RegClassID;
1945 case 64:
1946 return TTMP_64RegClassID;
1947 case 128:
1948 return TTMP_128RegClassID;
1949 case 256:
1950 return TTMP_256RegClassID;
1951 case 288:
1952 return TTMP_288RegClassID;
1953 case 320:
1954 return TTMP_320RegClassID;
1955 case 352:
1956 return TTMP_352RegClassID;
1957 case 384:
1958 return TTMP_384RegClassID;
1959 case 512:
1960 return TTMP_512RegClassID;
1961 }
1962 llvm_unreachable("Invalid register width!");
1963}
1964
1965int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1966 using namespace AMDGPU::EncValues;
1967
1968 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1969 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1970
1971 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1972}
1973
1975 unsigned Val) const {
1976 using namespace AMDGPU::EncValues;
1977
1978 assert(Val < 1024); // enum10
1979
1980 bool IsAGPR = Val & 512;
1981 Val &= 511;
1982
1983 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1984 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1985 : getVgprClassId(Width), Val - VGPR_MIN);
1986 }
1987 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1988}
1989
1991 unsigned Width,
1992 unsigned Val) const {
1993 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1994 // decoded earlier.
1995 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1996 using namespace AMDGPU::EncValues;
1997
1998 if (Val <= SGPR_MAX) {
1999 // "SGPR_MIN <= Val" is always true and causes compilation warning.
2000 static_assert(SGPR_MIN == 0);
2001 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
2002 }
2003
2004 int TTmpIdx = getTTmpIdx(Val);
2005 if (TTmpIdx >= 0) {
2006 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
2007 }
2008
2009 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2010 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2011 Val == LITERAL_CONST)
2012 return MCOperand::createImm(Val);
2013
2014 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2015 return decodeLiteral64Constant();
2016 }
2017
2018 switch (Width) {
2019 case 32:
2020 case 16:
2021 return decodeSpecialReg32(Val);
2022 case 64:
2023 return decodeSpecialReg64(Val);
2024 case 96:
2025 case 128:
2026 case 256:
2027 case 512:
2028 return decodeSpecialReg96Plus(Val);
2029 default:
2030 llvm_unreachable("unexpected immediate type");
2031 }
2032}
2033
2034// Bit 0 of DstY isn't stored in the instruction, because it's always the
2035// opposite of bit 0 of DstX.
2037 unsigned Val) const {
2038 int VDstXInd =
2039 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2040 assert(VDstXInd != -1);
2041 assert(Inst.getOperand(VDstXInd).isReg());
2042 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2043 Val |= ~XDstReg & 1;
2044 return createRegOperand(getVgprClassId(32), Val);
2045}
2046
2048 using namespace AMDGPU;
2049
2050 switch (Val) {
2051 // clang-format off
2052 case 102: return createRegOperand(FLAT_SCR_LO);
2053 case 103: return createRegOperand(FLAT_SCR_HI);
2054 case 104: return createRegOperand(XNACK_MASK_LO);
2055 case 105: return createRegOperand(XNACK_MASK_HI);
2056 case 106: return createRegOperand(VCC_LO);
2057 case 107: return createRegOperand(VCC_HI);
2058 case 108: return createRegOperand(TBA_LO);
2059 case 109: return createRegOperand(TBA_HI);
2060 case 110: return createRegOperand(TMA_LO);
2061 case 111: return createRegOperand(TMA_HI);
2062 case 124:
2063 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2064 case 125:
2065 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2066 case 126: return createRegOperand(EXEC_LO);
2067 case 127: return createRegOperand(EXEC_HI);
2068 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2069 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2070 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2071 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2072 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2073 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2074 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2075 case 251: return createRegOperand(SRC_VCCZ);
2076 case 252: return createRegOperand(SRC_EXECZ);
2077 case 253: return createRegOperand(SRC_SCC);
2078 case 254: return createRegOperand(LDS_DIRECT);
2079 default: break;
2080 // clang-format on
2081 }
2082 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2083}
2084
2086 using namespace AMDGPU;
2087
2088 switch (Val) {
2089 case 102: return createRegOperand(FLAT_SCR);
2090 case 104: return createRegOperand(XNACK_MASK);
2091 case 106: return createRegOperand(VCC);
2092 case 108: return createRegOperand(TBA);
2093 case 110: return createRegOperand(TMA);
2094 case 124:
2095 if (isGFX11Plus())
2096 return createRegOperand(SGPR_NULL);
2097 break;
2098 case 125:
2099 if (!isGFX11Plus())
2100 return createRegOperand(SGPR_NULL);
2101 break;
2102 case 126: return createRegOperand(EXEC);
2103 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2104 case 235: return createRegOperand(SRC_SHARED_BASE);
2105 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2106 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2107 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2108 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2109 case 251: return createRegOperand(SRC_VCCZ);
2110 case 252: return createRegOperand(SRC_EXECZ);
2111 case 253: return createRegOperand(SRC_SCC);
2112 default: break;
2113 }
2114 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2115}
2116
2118 using namespace AMDGPU;
2119
2120 switch (Val) {
2121 case 124:
2122 if (isGFX11Plus())
2123 return createRegOperand(SGPR_NULL);
2124 break;
2125 case 125:
2126 if (!isGFX11Plus())
2127 return createRegOperand(SGPR_NULL);
2128 break;
2129 default:
2130 break;
2131 }
2132 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2133}
2134
2136 const unsigned Val) const {
2137 using namespace AMDGPU::SDWA;
2138 using namespace AMDGPU::EncValues;
2139
2140 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2141 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2142 // XXX: cast to int is needed to avoid stupid warning:
2143 // compare with unsigned is always true
2144 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2145 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2146 return createRegOperand(getVgprClassId(Width),
2147 Val - SDWA9EncValues::SRC_VGPR_MIN);
2148 }
2149 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2150 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2151 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2152 return createSRegOperand(getSgprClassId(Width),
2153 Val - SDWA9EncValues::SRC_SGPR_MIN);
2154 }
2155 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2156 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2157 return createSRegOperand(getTtmpClassId(Width),
2158 Val - SDWA9EncValues::SRC_TTMP_MIN);
2159 }
2160
2161 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2162
2163 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2164 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2165 return MCOperand::createImm(SVal);
2166
2167 return decodeSpecialReg32(SVal);
2168 }
2169 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2170 return createRegOperand(getVgprClassId(Width), Val);
2171 llvm_unreachable("unsupported target");
2172}
2173
2175 return decodeSDWASrc(16, Val);
2176}
2177
2179 return decodeSDWASrc(32, Val);
2180}
2181
2183 using namespace AMDGPU::SDWA;
2184
2185 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2186 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2187 "SDWAVopcDst should be present only on GFX9+");
2188
2189 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2190
2191 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2192 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2193
2194 int TTmpIdx = getTTmpIdx(Val);
2195 if (TTmpIdx >= 0) {
2196 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2197 return createSRegOperand(TTmpClsId, TTmpIdx);
2198 }
2199 if (Val > SGPR_MAX) {
2200 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2201 }
2202 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2203 }
2204 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2205}
2206
2208 unsigned Val) const {
2209 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2210 ? decodeSrcOp(Inst, 32, Val)
2211 : decodeSrcOp(Inst, 64, Val);
2212}
2213
2215 unsigned Val) const {
2216 return decodeSrcOp(Inst, 32, Val);
2217}
2218
2221 return MCOperand();
2222 return MCOperand::createImm(Val);
2223}
2224
2226 using VersionField = AMDGPU::EncodingField<7, 0>;
2227 using W64Bit = AMDGPU::EncodingBit<13>;
2228 using W32Bit = AMDGPU::EncodingBit<14>;
2229 using MDPBit = AMDGPU::EncodingBit<15>;
2231
2232 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2233
2234 // Decode into a plain immediate if any unused bits are raised.
2235 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2236 return MCOperand::createImm(Imm);
2237
2238 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2239 const auto *I = find_if(
2240 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2241 return V.Code == Version;
2242 });
2243 MCContext &Ctx = getContext();
2244 const MCExpr *E;
2245 if (I == Versions.end())
2247 else
2248 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2249
2250 if (W64)
2251 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2252 if (W32)
2253 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2254 if (MDP)
2255 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2256
2257 return MCOperand::createExpr(E);
2258}
2259
2261 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2262}
2263
2265
2267 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2268}
2269
2271
2273
2277
2279 return STI.hasFeature(AMDGPU::FeatureGFX11);
2280}
2281
2285
2287 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2288}
2289
2291 return STI.hasFeature(AMDGPU::FeatureGFX12);
2292}
2293
2297
2299
2303
2305
2309
2311 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2312}
2313
2317
2318//===----------------------------------------------------------------------===//
2319// AMDGPU specific symbol handling
2320//===----------------------------------------------------------------------===//
2321
2322/// Print a string describing the reserved bit range specified by Mask with
2323/// offset BaseBytes for use in error comments. Mask is a single continuous
2324/// range of 1s surrounded by zeros. The format here is meant to align with the
2325/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2326static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2327 SmallString<32> Result;
2328 raw_svector_ostream S(Result);
2329
2330 int TrailingZeros = llvm::countr_zero(Mask);
2331 int PopCount = llvm::popcount(Mask);
2332
2333 if (PopCount == 1) {
2334 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2335 } else {
2336 S << "bits in range ("
2337 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2338 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2339 }
2340
2341 return Result;
2342}
2343
2344#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2345#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2346 do { \
2347 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2348 } while (0)
2349#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2350 do { \
2351 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2352 << GET_FIELD(MASK) << '\n'; \
2353 } while (0)
2354
2355#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2356 do { \
2357 if (FourByteBuffer & (MASK)) { \
2358 return createStringError(std::errc::invalid_argument, \
2359 "kernel descriptor " DESC \
2360 " reserved %s set" MSG, \
2361 getBitRangeFromMask((MASK), 0).c_str()); \
2362 } \
2363 } while (0)
2364
2365#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2366#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2367 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2368#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2369 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2370#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2371 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2372
2373// NOLINTNEXTLINE(readability-identifier-naming)
2375 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2376 using namespace amdhsa;
2377 StringRef Indent = "\t";
2378
2379 // We cannot accurately backward compute #VGPRs used from
2380 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2381 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2382 // simply calculate the inverse of what the assembler does.
2383
2384 uint32_t GranulatedWorkitemVGPRCount =
2385 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2386
2387 uint32_t NextFreeVGPR =
2388 (GranulatedWorkitemVGPRCount + 1) *
2389 AMDGPU::IsaInfo::getVGPREncodingGranule(STI, EnableWavefrontSize32);
2390
2391 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2392
2393 // We cannot backward compute values used to calculate
2394 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2395 // directives can't be computed:
2396 // .amdhsa_reserve_vcc
2397 // .amdhsa_reserve_flat_scratch
2398 // .amdhsa_reserve_xnack_mask
2399 // They take their respective default values if not specified in the assembly.
2400 //
2401 // GRANULATED_WAVEFRONT_SGPR_COUNT
2402 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2403 //
2404 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2405 // are set to 0. So while disassembling we consider that:
2406 //
2407 // GRANULATED_WAVEFRONT_SGPR_COUNT
2408 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2409 //
2410 // The disassembler cannot recover the original values of those 3 directives.
2411
2412 uint32_t GranulatedWavefrontSGPRCount =
2413 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2414
2415 if (isGFX10Plus())
2416 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2417 "must be zero on gfx10+");
2418
2419 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2421
2422 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2424 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2425 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2426 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2427 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2428 << '\n';
2429 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2430
2431 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2432
2433 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2434 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2435 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2436 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2437 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2438 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2439 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2440 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2441
2442 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2443
2444 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2445 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2446 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2447
2448 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2449
2450 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2451 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2452 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2453
2454 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2455 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2456
2457 // Bits [26].
2458 if (isGFX9Plus()) {
2459 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2460 } else {
2461 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2462 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2463 }
2464
2465 // Bits [27].
2466 if (isGFX1250Plus()) {
2467 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2468 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2469 } else {
2470 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2471 "COMPUTE_PGM_RSRC1");
2472 }
2473
2474 // Bits [28].
2475 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2476
2477 // Bits [29-31].
2478 if (isGFX10Plus()) {
2479 // WGP_MODE is not available on GFX1250.
2480 if (!isGFX1250Plus()) {
2481 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2482 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2483 }
2484 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2485 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2486 } else {
2487 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2488 "COMPUTE_PGM_RSRC1");
2489 }
2490
2491 if (isGFX12Plus())
2492 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2493 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2494
2495 return true;
2496}
2497
2498// NOLINTNEXTLINE(readability-identifier-naming)
2500 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2501 using namespace amdhsa;
2502 StringRef Indent = "\t";
2504 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2505 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2506 else
2507 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2508 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2509 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2510 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2511 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2512 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2513 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2514 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2515 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2516 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2517 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2518 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2519
2520 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2521 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2522 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2523
2525 ".amdhsa_exception_fp_ieee_invalid_op",
2526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2527 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2528 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2530 ".amdhsa_exception_fp_ieee_div_zero",
2531 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2532 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2533 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2534 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2535 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2536 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2537 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2538 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2539 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2540
2541 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2542
2543 return true;
2544}
2545
2546// NOLINTNEXTLINE(readability-identifier-naming)
2548 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2549 using namespace amdhsa;
2550 StringRef Indent = "\t";
2551 if (isGFX90A()) {
2552 KdStream << Indent << ".amdhsa_accum_offset "
2553 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2554 << '\n';
2555
2556 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2557
2558 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2559 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2560 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2561 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2562 } else if (isGFX10Plus()) {
2563 // Bits [0-3].
2564 if (!isGFX12Plus()) {
2565 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2566 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2567 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2568 } else {
2570 "SHARED_VGPR_COUNT",
2571 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2572 }
2573 } else {
2574 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2575 "COMPUTE_PGM_RSRC3",
2576 "must be zero on gfx12+");
2577 }
2578
2579 // Bits [4-11].
2580 if (isGFX11()) {
2581 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2582 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2583 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2584 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2585 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2586 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2587 } else if (isGFX12Plus()) {
2588 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2589 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2590 } else {
2591 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2592 "COMPUTE_PGM_RSRC3",
2593 "must be zero on gfx10");
2594 }
2595
2596 // Bits [12].
2597 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2598 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2599
2600 // Bits [13].
2601 if (isGFX12Plus()) {
2603 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2604 } else {
2605 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2606 "COMPUTE_PGM_RSRC3",
2607 "must be zero on gfx10 or gfx11");
2608 }
2609
2610 // Bits [14-21].
2611 if (isGFX1250Plus()) {
2612 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2613 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2615 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2617 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2619 "ENABLE_DIDT_THROTTLE",
2620 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2621 } else {
2622 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2623 "COMPUTE_PGM_RSRC3",
2624 "must be zero on gfx10+");
2625 }
2626
2627 // Bits [22-30].
2628 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2629 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2630
2631 // Bits [31].
2632 if (isGFX11Plus()) {
2634 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2635 } else {
2636 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2637 "COMPUTE_PGM_RSRC3",
2638 "must be zero on gfx10");
2639 }
2640 } else if (FourByteBuffer) {
2641 return createStringError(
2642 std::errc::invalid_argument,
2643 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2644 }
2645 return true;
2646}
2647#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2648#undef PRINT_DIRECTIVE
2649#undef GET_FIELD
2650#undef CHECK_RESERVED_BITS_IMPL
2651#undef CHECK_RESERVED_BITS
2652#undef CHECK_RESERVED_BITS_MSG
2653#undef CHECK_RESERVED_BITS_DESC
2654#undef CHECK_RESERVED_BITS_DESC_MSG
2655
2656/// Create an error object to return from onSymbolStart for reserved kernel
2657/// descriptor bits being set.
2658static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2659 const char *Msg = "") {
2660 return createStringError(
2661 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2662 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2663}
2664
2665/// Create an error object to return from onSymbolStart for reserved kernel
2666/// descriptor bytes being set.
2667static Error createReservedKDBytesError(unsigned BaseInBytes,
2668 unsigned WidthInBytes) {
2669 // Create an error comment in the same format as the "Kernel Descriptor"
2670 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2671 return createStringError(
2672 std::errc::invalid_argument,
2673 "kernel descriptor reserved bits in range (%u:%u) set",
2674 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2675}
2676
2679 raw_string_ostream &KdStream) const {
2680#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2681 do { \
2682 KdStream << Indent << DIRECTIVE " " \
2683 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2684 } while (0)
2685
2686 uint16_t TwoByteBuffer = 0;
2687 uint32_t FourByteBuffer = 0;
2688
2689 StringRef ReservedBytes;
2690 StringRef Indent = "\t";
2691
2692 assert(Bytes.size() == 64);
2693 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2694
2695 switch (Cursor.tell()) {
2697 FourByteBuffer = DE.getU32(Cursor);
2698 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2699 << '\n';
2700 return true;
2701
2703 FourByteBuffer = DE.getU32(Cursor);
2704 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2705 << FourByteBuffer << '\n';
2706 return true;
2707
2709 FourByteBuffer = DE.getU32(Cursor);
2710 KdStream << Indent << ".amdhsa_kernarg_size "
2711 << FourByteBuffer << '\n';
2712 return true;
2713
2715 // 4 reserved bytes, must be 0.
2716 ReservedBytes = DE.getBytes(Cursor, 4);
2717 for (char B : ReservedBytes) {
2718 if (B != 0)
2720 }
2721 return true;
2722
2724 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2725 // So far no directive controls this for Code Object V3, so simply skip for
2726 // disassembly.
2727 DE.skip(Cursor, 8);
2728 return true;
2729
2731 // 20 reserved bytes, must be 0.
2732 ReservedBytes = DE.getBytes(Cursor, 20);
2733 for (char B : ReservedBytes) {
2734 if (B != 0)
2736 }
2737 return true;
2738
2740 FourByteBuffer = DE.getU32(Cursor);
2741 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2742
2744 FourByteBuffer = DE.getU32(Cursor);
2745 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2746
2748 FourByteBuffer = DE.getU32(Cursor);
2749 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2750
2752 using namespace amdhsa;
2753 TwoByteBuffer = DE.getU16(Cursor);
2754
2756 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2758 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2760 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2761 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2762 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2763 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2764 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2765 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2767 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2768 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2769 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2770 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2771
2772 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2773 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2775
2776 // Reserved for GFX9
2777 if (isGFX9() &&
2778 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2780 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2781 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2782 }
2783 if (isGFX10Plus()) {
2784 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2785 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2786 }
2787
2788 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2789 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2790 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2791
2792 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2793 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2795 }
2796
2797 return true;
2798
2800 using namespace amdhsa;
2801 TwoByteBuffer = DE.getU16(Cursor);
2802 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2803 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2804 KERNARG_PRELOAD_SPEC_LENGTH);
2805 }
2806
2807 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2808 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2809 KERNARG_PRELOAD_SPEC_OFFSET);
2810 }
2811 return true;
2812
2814 // 4 bytes from here are reserved, must be 0.
2815 ReservedBytes = DE.getBytes(Cursor, 4);
2816 for (char B : ReservedBytes) {
2817 if (B != 0)
2819 }
2820 return true;
2821
2822 default:
2823 llvm_unreachable("Unhandled index. Case statements cover everything.");
2824 return true;
2825 }
2826#undef PRINT_DIRECTIVE
2827}
2828
2830 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2831
2832 // CP microcode requires the kernel descriptor to be 64 aligned.
2833 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2834 return createStringError(std::errc::invalid_argument,
2835 "kernel descriptor must be 64-byte aligned");
2836
2837 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2838 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2839 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2840 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2841 // when required.
2842 if (isGFX10Plus()) {
2843 uint16_t KernelCodeProperties =
2846 EnableWavefrontSize32 =
2847 AMDHSA_BITS_GET(KernelCodeProperties,
2848 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2849 }
2850
2851 std::string Kd;
2852 raw_string_ostream KdStream(Kd);
2853 KdStream << ".amdhsa_kernel " << KdName << '\n';
2854
2856 while (C && C.tell() < Bytes.size()) {
2857 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2858
2859 cantFail(C.takeError());
2860
2861 if (!Res)
2862 return Res;
2863 }
2864 KdStream << ".end_amdhsa_kernel\n";
2865 outs() << KdStream.str();
2866 return true;
2867}
2868
2870 uint64_t &Size,
2871 ArrayRef<uint8_t> Bytes,
2872 uint64_t Address) const {
2873 // Right now only kernel descriptor needs to be handled.
2874 // We ignore all other symbols for target specific handling.
2875 // TODO:
2876 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2877 // Object V2 and V3 when symbols are marked protected.
2878
2879 // amd_kernel_code_t for Code Object V2.
2880 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2881 Size = 256;
2882 return createStringError(std::errc::invalid_argument,
2883 "code object v2 is not supported");
2884 }
2885
2886 // Code Object V3 kernel descriptors.
2887 StringRef Name = Symbol.Name;
2888 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2889 Size = 64; // Size = 64 regardless of success or failure.
2890 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2891 }
2892
2893 return false;
2894}
2895
2896const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2897 int64_t Val) {
2898 MCContext &Ctx = getContext();
2899 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2900 // Note: only set value to Val on a new symbol in case an dissassembler
2901 // has already been initialized in this context.
2902 if (!Sym->isVariable()) {
2904 } else {
2905 int64_t Res = ~Val;
2906 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2907 if (!Valid || Res != Val)
2908 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2909 }
2910 return MCSymbolRefExpr::create(Sym, Ctx);
2911}
2912
2914 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2915
2916 // Check for MUBUF and MTBUF instructions
2917 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2918 return true;
2919
2920 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2921 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2922 return true;
2923
2924 return false;
2925}
2926
2927//===----------------------------------------------------------------------===//
2928// AMDGPUSymbolizer
2929//===----------------------------------------------------------------------===//
2930
2931// Try to find symbol name for specified label
2933 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2934 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2935 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2936
2937 if (!IsBranch) {
2938 return false;
2939 }
2940
2941 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2942 if (!Symbols)
2943 return false;
2944
2945 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2946 return Val.Addr == static_cast<uint64_t>(Value) &&
2947 Val.Type == ELF::STT_NOTYPE;
2948 });
2949 if (Result != Symbols->end()) {
2950 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2951 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2953 return true;
2954 }
2955 // Add to list of referenced addresses, so caller can synthesize a label.
2956 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2957 return false;
2958}
2959
2961 int64_t Value,
2962 uint64_t Address) {
2963 llvm_unreachable("unimplemented");
2964}
2965
2966//===----------------------------------------------------------------------===//
2967// Initialization
2968//===----------------------------------------------------------------------===//
2969
2971 LLVMOpInfoCallback /*GetOpInfo*/,
2972 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2973 void *DisInfo,
2974 MCContext *Ctx,
2975 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2976 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2977}
2978
2980 const MCSubtargetInfo &STI,
2981 MCContext &Ctx) {
2982 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2983}
2984
2985extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:220
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:238
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:229
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:231
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:216
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:215
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:205
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:214
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:223
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:217
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:232
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:243
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:244
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:218
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:208
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:230
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:245
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:207
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1419
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ STT_OBJECT
Definition ELF.h:1420
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.