LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
537 Imm = getInlineImmValF16(Imm);
538 break;
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX11() &&
594 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX1250() &&
599 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX12() &&
604 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
610 break;
611
612 if (isGFX13() &&
613 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
614 DecW, Address, CS))
615 break;
616
617 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
618 // Return 8 bytes for a potential literal.
619 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
620
621 if (isGFX1250() &&
622 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
623 break;
624 }
625
626 // Reinitialize Bytes
627 Bytes = Bytes_.slice(0, MaxInstBytesNum);
628
629 } else if (Bytes.size() >= 16 &&
630 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
631 std::bitset<128> DecW = eat16Bytes(Bytes);
632 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
633 break;
634
635 // Reinitialize Bytes
636 Bytes = Bytes_.slice(0, MaxInstBytesNum);
637 }
638
639 if (Bytes.size() >= 8) {
640 const uint64_t QW = eatBytes<uint64_t>(Bytes);
641
642 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
643 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
644 break;
645
646 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
647 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
648 break;
649
650 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
651 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
652 break;
653
654 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
655 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
656 // table first so we print the correct name.
657 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
658 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
659 break;
660
661 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
662 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
663 break;
664
665 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
666 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
667 break;
668
669 if ((isVI() || isGFX9()) &&
670 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
671 break;
672
673 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
674 break;
675
676 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
677 break;
678
679 if (isGFX1250() &&
680 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
681 QW, Address, CS))
682 break;
683
684 if (isGFX12() &&
685 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
686 Address, CS))
687 break;
688
689 if (isGFX1170() &&
690 tryDecodeInst(DecoderTableGFX117064, MI, QW, Address, CS))
691 break;
692
693 if (isGFX11() &&
694 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
695 Address, CS))
696 break;
697
698 if (isGFX1170() &&
699 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
700 break;
701
702 if (isGFX11() &&
703 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
704 break;
705
706 if (isGFX12() &&
707 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
708 break;
709
710 if (isGFX13() &&
711 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
712 Address, CS))
713 break;
714
715 // Reinitialize Bytes
716 Bytes = Bytes_.slice(0, MaxInstBytesNum);
717 }
718
719 // Try decode 32-bit instruction
720 if (Bytes.size() >= 4) {
721 const uint32_t DW = eatBytes<uint32_t>(Bytes);
722
723 if ((isVI() || isGFX9()) &&
724 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
725 break;
726
727 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
728 break;
729
730 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
731 break;
732
733 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
734 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
735 break;
736
737 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
738 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
739 break;
740
741 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
742 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
743 break;
744
745 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
746 break;
747
748 if (isGFX11() &&
749 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
750 Address, CS))
751 break;
752
753 if (isGFX1250() &&
754 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
755 DW, Address, CS))
756 break;
757
758 if (isGFX12() &&
759 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
760 Address, CS))
761 break;
762
763 if (isGFX13() &&
764 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
765 Address, CS))
766 break;
767 }
768
770 } while (false);
771
773
774 decodeImmOperands(MI, *MCII);
775
776 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
777 if (isMacDPP(MI))
779
780 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
782 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
783 convertVOPCDPPInst(MI); // Special VOP3 case
784 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
785 convertVOPC64DPPInst(MI); // Special VOP3 case
786 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
787 -1)
789 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
790 convertVOP3DPPInst(MI); // Regular VOP3 case
791 }
792
794
795 if (AMDGPU::isMAC(MI.getOpcode())) {
796 // Insert dummy unused src2_modifiers.
798 AMDGPU::OpName::src2_modifiers);
799 }
800
801 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
802 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
803 // Insert dummy unused src2_modifiers.
805 AMDGPU::OpName::src2_modifiers);
806 }
807
808 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
810 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
811 }
812
813 if (MCII->get(MI.getOpcode()).TSFlags &
815 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
816 AMDGPU::OpName::cpol);
817 if (CPolPos != -1) {
818 unsigned CPol =
819 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
821 if (MI.getNumOperands() <= (unsigned)CPolPos) {
823 AMDGPU::OpName::cpol);
824 } else if (CPol) {
825 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
826 }
827 }
828 }
829
830 if ((MCII->get(MI.getOpcode()).TSFlags &
832 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
833 // GFX90A lost TFE, its place is occupied by ACC.
834 int TFEOpIdx =
835 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
836 if (TFEOpIdx != -1) {
837 auto *TFEIter = MI.begin();
838 std::advance(TFEIter, TFEOpIdx);
839 MI.insert(TFEIter, MCOperand::createImm(0));
840 }
841 }
842
843 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
845 int OffsetIdx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
847 if (OffsetIdx != -1) {
848 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
849 int64_t SignedOffset = SignExtend64<24>(Imm);
850 if (SignedOffset < 0)
852 }
853 }
854
855 if (MCII->get(MI.getOpcode()).TSFlags &
857 int SWZOpIdx =
858 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
859 if (SWZOpIdx != -1) {
860 auto *SWZIter = MI.begin();
861 std::advance(SWZIter, SWZOpIdx);
862 MI.insert(SWZIter, MCOperand::createImm(0));
863 }
864 }
865
866 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
867 if (Desc.TSFlags & SIInstrFlags::MIMG) {
868 int VAddr0Idx =
869 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
870 int RsrcIdx =
871 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
872 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
873 if (VAddr0Idx >= 0 && NSAArgs > 0) {
874 unsigned NSAWords = (NSAArgs + 3) / 4;
875 if (Bytes.size() < 4 * NSAWords)
877 for (unsigned i = 0; i < NSAArgs; ++i) {
878 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
879 auto VAddrRCID =
880 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
881 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
882 }
883 Bytes = Bytes.slice(4 * NSAWords);
884 }
885
887 }
888
889 if (MCII->get(MI.getOpcode()).TSFlags &
892
893 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
895
896 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
898
899 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
901
902 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
904
905 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
907
908 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
909 AMDGPU::OpName::vdst_in);
910 if (VDstIn_Idx != -1) {
911 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
913 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
914 !MI.getOperand(VDstIn_Idx).isReg() ||
915 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
916 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
917 MI.erase(&MI.getOperand(VDstIn_Idx));
919 MCOperand::createReg(MI.getOperand(Tied).getReg()),
920 AMDGPU::OpName::vdst_in);
921 }
922 }
923
924 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
925 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
927
928 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
929 // have EXEC as implicit destination. Issue a warning if encoding for
930 // vdst is not EXEC.
931 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
932 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
933 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
934 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
935 if (Bytes_[0] != ExecEncoding)
937 }
938
939 Size = MaxInstBytesNum - Bytes.size();
940 return Status;
941}
942
944 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
945 // The MCInst still has these fields even though they are no longer encoded
946 // in the GFX11 instruction.
947 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
948 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
949 }
950}
951
954 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
955 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
962 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
963 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
964 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
965 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
978 // The MCInst has this field that is not directly encoded in the
979 // instruction.
980 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
981 }
982}
983
985 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
986 STI.hasFeature(AMDGPU::FeatureGFX10)) {
987 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
988 // VOPC - insert clamp
989 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
990 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
991 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
992 if (SDst != -1) {
993 // VOPC - insert VCC register as sdst
995 AMDGPU::OpName::sdst);
996 } else {
997 // VOP1/2 - insert omod if present in instruction
998 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
999 }
1000 }
1001}
1002
1003/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1004/// appropriate subregister for the used format width.
1006 MCOperand &MO, uint8_t NumRegs) {
1007 switch (NumRegs) {
1008 case 4:
1009 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1010 case 6:
1011 return MO.setReg(
1012 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1013 case 8:
1014 if (MCRegister NewReg = MRI.getSubReg(
1015 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1016 MO.setReg(NewReg);
1017 }
1018 return;
1019 case 12: {
1020 // There is no 384-bit subreg index defined.
1021 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1022 MCRegister NewReg = MRI.getMatchingSuperReg(
1023 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1024 return MO.setReg(NewReg);
1025 }
1026 case 16:
1027 // No-op in cases where one operand is still f8/bf8.
1028 return;
1029 default:
1030 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1031 }
1032}
1033
1034/// f8f6f4 instructions have different pseudos depending on the used formats. In
1035/// the disassembler table, we only have the variants with the largest register
1036/// classes which assume using an fp8/bf8 format for both operands. The actual
1037/// register class depends on the format in blgp and cbsz operands. Adjust the
1038/// register classes depending on the used format.
1040 int BlgpIdx =
1041 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1042 if (BlgpIdx == -1)
1043 return;
1044
1045 int CbszIdx =
1046 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1047
1048 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1049 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1050
1051 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1052 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1053 if (!AdjustedRegClassOpcode ||
1054 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1055 return;
1056
1057 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1058 int Src0Idx =
1059 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1060 int Src1Idx =
1061 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1062 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1063 AdjustedRegClassOpcode->NumRegsSrcA);
1064 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1065 AdjustedRegClassOpcode->NumRegsSrcB);
1066}
1067
1069 int FmtAIdx =
1070 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1071 if (FmtAIdx == -1)
1072 return;
1073
1074 int FmtBIdx =
1075 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1076
1077 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1078 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1079
1080 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1081 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1082 if (!AdjustedRegClassOpcode ||
1083 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1084 return;
1085
1086 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1087 int Src0Idx =
1088 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1089 int Src1Idx =
1090 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1091 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1092 AdjustedRegClassOpcode->NumRegsSrcA);
1093 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1094 AdjustedRegClassOpcode->NumRegsSrcB);
1095}
1096
1098 unsigned OpSel = 0;
1099 unsigned OpSelHi = 0;
1100 unsigned NegLo = 0;
1101 unsigned NegHi = 0;
1102};
1103
1104// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1105// Note that these values do not affect disassembler output,
1106// so this is only necessary for consistency with src_modifiers.
1108 bool IsVOP3P = false) {
1109 VOPModifiers Modifiers;
1110 unsigned Opc = MI.getOpcode();
1111 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1112 AMDGPU::OpName::src1_modifiers,
1113 AMDGPU::OpName::src2_modifiers};
1114 for (int J = 0; J < 3; ++J) {
1115 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1116 if (OpIdx == -1)
1117 continue;
1118
1119 unsigned Val = MI.getOperand(OpIdx).getImm();
1120
1121 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1122 if (IsVOP3P) {
1123 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1124 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1125 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1126 } else if (J == 0) {
1127 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1128 }
1129 }
1130
1131 return Modifiers;
1132}
1133
1134// Instructions decode the op_sel/suffix bits into the src_modifier
1135// operands. Copy those bits into the src operands for true16 VGPRs.
1137 const unsigned Opc = MI.getOpcode();
1138 const MCRegisterClass &ConversionRC =
1139 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1140 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1141 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1143 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1145 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1147 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1149 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1150 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1151 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1152 if (OpIdx == -1 || OpModsIdx == -1)
1153 continue;
1154 MCOperand &Op = MI.getOperand(OpIdx);
1155 if (!Op.isReg())
1156 continue;
1157 if (!ConversionRC.contains(Op.getReg()))
1158 continue;
1159 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1160 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1161 unsigned ModVal = OpMods.getImm();
1162 if (ModVal & OpSelMask) { // isHi
1163 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1164 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1165 }
1166 }
1167}
1168
1169// MAC opcodes have special old and src2 operands.
1170// src2 is tied to dst, while old is not tied (but assumed to be).
1172 constexpr int DST_IDX = 0;
1173 auto Opcode = MI.getOpcode();
1174 const auto &Desc = MCII->get(Opcode);
1175 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1176
1177 if (OldIdx != -1 && Desc.getOperandConstraint(
1178 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1179 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1180 assert(Desc.getOperandConstraint(
1181 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1183 (void)DST_IDX;
1184 return true;
1185 }
1186
1187 return false;
1188}
1189
1190// Create dummy old operand and insert dummy unused src2_modifiers
1192 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1193 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1195 AMDGPU::OpName::src2_modifiers);
1196}
1197
1199 unsigned Opc = MI.getOpcode();
1200
1201 int VDstInIdx =
1202 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1203 if (VDstInIdx != -1)
1204 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1205
1206 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1207 if (MI.getNumOperands() < DescNumOps &&
1208 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1210 auto Mods = collectVOPModifiers(MI);
1212 AMDGPU::OpName::op_sel);
1213 } else {
1214 // Insert dummy unused src modifiers.
1215 if (MI.getNumOperands() < DescNumOps &&
1216 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1218 AMDGPU::OpName::src0_modifiers);
1219
1220 if (MI.getNumOperands() < DescNumOps &&
1221 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1223 AMDGPU::OpName::src1_modifiers);
1224 }
1225}
1226
1229
1230 int VDstInIdx =
1231 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1232 if (VDstInIdx != -1)
1233 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1234
1235 unsigned Opc = MI.getOpcode();
1236 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1237 if (MI.getNumOperands() < DescNumOps &&
1238 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1239 auto Mods = collectVOPModifiers(MI);
1241 AMDGPU::OpName::op_sel);
1242 }
1243}
1244
1245// Given a wide tuple \p Reg check if it will overflow 256 registers.
1246// \returns \p Reg on success or NoRegister otherwise.
1248 const MCRegisterInfo &MRI) {
1249 unsigned NumRegs = RC.getSizeInBits() / 32;
1250 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1251 if (!Sub0)
1252 return Reg;
1253
1254 MCRegister BaseReg;
1255 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1256 BaseReg = AMDGPU::VGPR0;
1257 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1258 BaseReg = AMDGPU::AGPR0;
1259
1260 assert(BaseReg && "Only vector registers expected");
1261
1262 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1263}
1264
1265// Note that before gfx10, the MIMG encoding provided no information about
1266// VADDR size. Consequently, decoded instructions always show address as if it
1267// has 1 dword, which could be not really so.
1269 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1270
1271 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1272 AMDGPU::OpName::vdst);
1273
1274 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1275 AMDGPU::OpName::vdata);
1276 int VAddr0Idx =
1277 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1278 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1279 ? AMDGPU::OpName::srsrc
1280 : AMDGPU::OpName::rsrc;
1281 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1282 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1283 AMDGPU::OpName::dmask);
1284
1285 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1286 AMDGPU::OpName::tfe);
1287 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1288 AMDGPU::OpName::d16);
1289
1290 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1291 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1292 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1293
1294 assert(VDataIdx != -1);
1295 if (BaseOpcode->BVH) {
1296 // Add A16 operand for intersect_ray instructions
1297 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1298 return;
1299 }
1300
1301 bool IsAtomic = (VDstIdx != -1);
1302 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1303 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1304 bool IsNSA = false;
1305 bool IsPartialNSA = false;
1306 unsigned AddrSize = Info->VAddrDwords;
1307
1308 if (isGFX10Plus()) {
1309 unsigned DimIdx =
1310 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1311 int A16Idx =
1312 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1313 const AMDGPU::MIMGDimInfo *Dim =
1314 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1315 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1316
1317 AddrSize =
1318 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1319
1320 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1321 // VIMAGE insts other than BVH never use vaddr4.
1322 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1323 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1324 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1325 if (!IsNSA) {
1326 if (!IsVSample && AddrSize > 12)
1327 AddrSize = 16;
1328 } else {
1329 if (AddrSize > Info->VAddrDwords) {
1330 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1331 // The NSA encoding does not contain enough operands for the
1332 // combination of base opcode / dimension. Should this be an error?
1333 return;
1334 }
1335 IsPartialNSA = true;
1336 }
1337 }
1338 }
1339
1340 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1341 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1342
1343 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1344 if (D16 && AMDGPU::hasPackedD16(STI)) {
1345 DstSize = (DstSize + 1) / 2;
1346 }
1347
1348 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1349 DstSize += 1;
1350
1351 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1352 return;
1353
1354 int NewOpcode =
1355 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1356 if (NewOpcode == -1)
1357 return;
1358
1359 // Widen the register to the correct number of enabled channels.
1360 MCRegister NewVdata;
1361 if (DstSize != Info->VDataDwords) {
1362 auto DataRCID = MCII->getOpRegClassID(
1363 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1364
1365 // Get first subregister of VData
1366 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1367 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1368 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1369
1370 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1371 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1372 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1373 if (!NewVdata) {
1374 // It's possible to encode this such that the low register + enabled
1375 // components exceeds the register count.
1376 return;
1377 }
1378 }
1379
1380 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1381 // If using partial NSA on GFX11+ widen last address register.
1382 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1383 MCRegister NewVAddrSA;
1384 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1385 AddrSize != Info->VAddrDwords) {
1386 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1387 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1388 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1389
1390 auto AddrRCID = MCII->getOpRegClassID(
1391 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1392
1393 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1394 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1395 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1396 if (!NewVAddrSA)
1397 return;
1398 }
1399
1400 MI.setOpcode(NewOpcode);
1401
1402 if (NewVdata != AMDGPU::NoRegister) {
1403 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1404
1405 if (IsAtomic) {
1406 // Atomic operations have an additional operand (a copy of data)
1407 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1408 }
1409 }
1410
1411 if (NewVAddrSA) {
1412 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1413 } else if (IsNSA) {
1414 assert(AddrSize <= Info->VAddrDwords);
1415 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1416 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1417 }
1418}
1419
1420// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1421// decoder only adds to src_modifiers, so manually add the bits to the other
1422// operands.
1424 unsigned Opc = MI.getOpcode();
1425 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1426 auto Mods = collectVOPModifiers(MI, true);
1427
1428 if (MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1430 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1431
1432 if (MI.getNumOperands() < DescNumOps &&
1433 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1435 AMDGPU::OpName::op_sel);
1436 if (MI.getNumOperands() < DescNumOps &&
1437 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1439 AMDGPU::OpName::op_sel_hi);
1440 if (MI.getNumOperands() < DescNumOps &&
1441 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1443 AMDGPU::OpName::neg_lo);
1444 if (MI.getNumOperands() < DescNumOps &&
1445 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1447 AMDGPU::OpName::neg_hi);
1448}
1449
1450// Create dummy old operand and insert optional operands
1452 unsigned Opc = MI.getOpcode();
1453 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1454
1455 if (MI.getNumOperands() < DescNumOps &&
1456 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1457 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1458
1459 if (MI.getNumOperands() < DescNumOps &&
1460 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1462 AMDGPU::OpName::src0_modifiers);
1463
1464 if (MI.getNumOperands() < DescNumOps &&
1465 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1467 AMDGPU::OpName::src1_modifiers);
1468}
1469
1471 unsigned Opc = MI.getOpcode();
1472 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1473
1475
1476 if (MI.getNumOperands() < DescNumOps &&
1477 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1480 AMDGPU::OpName::op_sel);
1481 }
1482}
1483
1485 assert(HasLiteral && "Should have decoded a literal");
1486 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1487}
1488
1489const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1490 return getContext().getRegisterInfo()->
1491 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1492}
1493
1494inline
1496 const Twine& ErrMsg) const {
1497 *CommentStream << "Error: " + ErrMsg;
1498
1499 // ToDo: add support for error operands to MCInst.h
1500 // return MCOperand::createError(V);
1501 return MCOperand();
1502}
1503
1507
1508inline
1510 unsigned Val) const {
1511 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1512 if (Val >= RegCl.getNumRegs())
1513 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1514 ": unknown register " + Twine(Val));
1515 return createRegOperand(RegCl.getRegister(Val));
1516}
1517
1518inline
1520 unsigned Val) const {
1521 // ToDo: SI/CI have 104 SGPRs, VI - 102
1522 // Valery: here we accepting as much as we can, let assembler sort it out
1523 int shift = 0;
1524 switch (SRegClassID) {
1525 case AMDGPU::SGPR_32RegClassID:
1526 case AMDGPU::TTMP_32RegClassID:
1527 break;
1528 case AMDGPU::SGPR_64RegClassID:
1529 case AMDGPU::TTMP_64RegClassID:
1530 shift = 1;
1531 break;
1532 case AMDGPU::SGPR_96RegClassID:
1533 case AMDGPU::TTMP_96RegClassID:
1534 case AMDGPU::SGPR_128RegClassID:
1535 case AMDGPU::TTMP_128RegClassID:
1536 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1537 // this bundle?
1538 case AMDGPU::SGPR_256RegClassID:
1539 case AMDGPU::TTMP_256RegClassID:
1540 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1541 // this bundle?
1542 case AMDGPU::SGPR_288RegClassID:
1543 case AMDGPU::TTMP_288RegClassID:
1544 case AMDGPU::SGPR_320RegClassID:
1545 case AMDGPU::TTMP_320RegClassID:
1546 case AMDGPU::SGPR_352RegClassID:
1547 case AMDGPU::TTMP_352RegClassID:
1548 case AMDGPU::SGPR_384RegClassID:
1549 case AMDGPU::TTMP_384RegClassID:
1550 case AMDGPU::SGPR_512RegClassID:
1551 case AMDGPU::TTMP_512RegClassID:
1552 shift = 2;
1553 break;
1554 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1555 // this bundle?
1556 default:
1557 llvm_unreachable("unhandled register class");
1558 }
1559
1560 if (Val % (1 << shift)) {
1561 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1562 << ": scalar reg isn't aligned " << Val;
1563 }
1564
1565 return createRegOperand(SRegClassID, Val >> shift);
1566}
1567
1569 bool IsHi) const {
1570 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1571 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1572}
1573
1574// Decode Literals for insts which always have a literal in the encoding
1577 if (HasLiteral) {
1578 assert(
1580 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1581 if (Literal != Val)
1582 return errOperand(Val, "More than one unique literal is illegal");
1583 }
1584 HasLiteral = true;
1585 Literal = Val;
1586 return MCOperand::createImm(Literal);
1587}
1588
1591 if (HasLiteral) {
1592 if (Literal != Val)
1593 return errOperand(Val, "More than one unique literal is illegal");
1594 }
1595 HasLiteral = true;
1596 Literal = Val;
1597
1598 bool UseLit64 = Hi_32(Literal) == 0;
1600 LitModifier::Lit64, Literal, getContext()))
1601 : MCOperand::createImm(Literal);
1602}
1603
1606 const MCOperandInfo &OpDesc) const {
1607 // For now all literal constants are supposed to be unsigned integer
1608 // ToDo: deal with signed/unsigned 64-bit integer constants
1609 // ToDo: deal with float/double constants
1610 if (!HasLiteral) {
1611 if (Bytes.size() < 4) {
1612 return errOperand(0, "cannot read literal, inst bytes left " +
1613 Twine(Bytes.size()));
1614 }
1615 HasLiteral = true;
1616 Literal = eatBytes<uint32_t>(Bytes);
1617 }
1618
1619 // For disassembling always assume all inline constants are available.
1620 bool HasInv2Pi = true;
1621
1622 // Invalid instruction codes may contain literals for inline-only
1623 // operands, so we support them here as well.
1624 int64_t Val = Literal;
1625 bool UseLit = false;
1626 switch (OpDesc.OperandType) {
1627 default:
1628 llvm_unreachable("Unexpected operand type!");
1632 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1633 break;
1636 break;
1640 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1641 break;
1643 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1644 break;
1647 break;
1649 break;
1653 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1654 break;
1656 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1657 break;
1667 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1668 break;
1672 Val <<= 32;
1673 break;
1676 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1677 break;
1679 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1680 // decoding a literal in a position of a register operand. Give
1681 // it special handling in the caller, decodeImmOperands(), instead
1682 // of quietly allowing it here.
1683 break;
1684 }
1685
1688 : MCOperand::createImm(Val);
1689}
1690
1692 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1693
1694 if (!HasLiteral) {
1695 if (Bytes.size() < 8) {
1696 return errOperand(0, "cannot read literal64, inst bytes left " +
1697 Twine(Bytes.size()));
1698 }
1699 HasLiteral = true;
1700 Literal = eatBytes<uint64_t>(Bytes);
1701 }
1702
1703 bool UseLit64 = Hi_32(Literal) == 0;
1705 LitModifier::Lit64, Literal, getContext()))
1706 : MCOperand::createImm(Literal);
1707}
1708
1710 using namespace AMDGPU::EncValues;
1711
1712 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1713 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1714 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1715 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1716 // Cast prevents negative overflow.
1717}
1718
1719static int64_t getInlineImmVal32(unsigned Imm) {
1720 switch (Imm) {
1721 case 240:
1722 return llvm::bit_cast<uint32_t>(0.5f);
1723 case 241:
1724 return llvm::bit_cast<uint32_t>(-0.5f);
1725 case 242:
1726 return llvm::bit_cast<uint32_t>(1.0f);
1727 case 243:
1728 return llvm::bit_cast<uint32_t>(-1.0f);
1729 case 244:
1730 return llvm::bit_cast<uint32_t>(2.0f);
1731 case 245:
1732 return llvm::bit_cast<uint32_t>(-2.0f);
1733 case 246:
1734 return llvm::bit_cast<uint32_t>(4.0f);
1735 case 247:
1736 return llvm::bit_cast<uint32_t>(-4.0f);
1737 case 248: // 1 / (2 * PI)
1738 return 0x3e22f983;
1739 default:
1740 llvm_unreachable("invalid fp inline imm");
1741 }
1742}
1743
1744static int64_t getInlineImmVal64(unsigned Imm) {
1745 switch (Imm) {
1746 case 240:
1747 return llvm::bit_cast<uint64_t>(0.5);
1748 case 241:
1749 return llvm::bit_cast<uint64_t>(-0.5);
1750 case 242:
1751 return llvm::bit_cast<uint64_t>(1.0);
1752 case 243:
1753 return llvm::bit_cast<uint64_t>(-1.0);
1754 case 244:
1755 return llvm::bit_cast<uint64_t>(2.0);
1756 case 245:
1757 return llvm::bit_cast<uint64_t>(-2.0);
1758 case 246:
1759 return llvm::bit_cast<uint64_t>(4.0);
1760 case 247:
1761 return llvm::bit_cast<uint64_t>(-4.0);
1762 case 248: // 1 / (2 * PI)
1763 return 0x3fc45f306dc9c882;
1764 default:
1765 llvm_unreachable("invalid fp inline imm");
1766 }
1767}
1768
1769static int64_t getInlineImmValF16(unsigned Imm) {
1770 switch (Imm) {
1771 case 240:
1772 return 0x3800;
1773 case 241:
1774 return 0xB800;
1775 case 242:
1776 return 0x3C00;
1777 case 243:
1778 return 0xBC00;
1779 case 244:
1780 return 0x4000;
1781 case 245:
1782 return 0xC000;
1783 case 246:
1784 return 0x4400;
1785 case 247:
1786 return 0xC400;
1787 case 248: // 1 / (2 * PI)
1788 return 0x3118;
1789 default:
1790 llvm_unreachable("invalid fp inline imm");
1791 }
1792}
1793
1794static int64_t getInlineImmValBF16(unsigned Imm) {
1795 switch (Imm) {
1796 case 240:
1797 return 0x3F00;
1798 case 241:
1799 return 0xBF00;
1800 case 242:
1801 return 0x3F80;
1802 case 243:
1803 return 0xBF80;
1804 case 244:
1805 return 0x4000;
1806 case 245:
1807 return 0xC000;
1808 case 246:
1809 return 0x4080;
1810 case 247:
1811 return 0xC080;
1812 case 248: // 1 / (2 * PI)
1813 return 0x3E22;
1814 default:
1815 llvm_unreachable("invalid fp inline imm");
1816 }
1817}
1818
1819unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1820 using namespace AMDGPU;
1821
1822 switch (Width) {
1823 case 16:
1824 case 32:
1825 return VGPR_32RegClassID;
1826 case 64:
1827 return VReg_64RegClassID;
1828 case 96:
1829 return VReg_96RegClassID;
1830 case 128:
1831 return VReg_128RegClassID;
1832 case 160:
1833 return VReg_160RegClassID;
1834 case 192:
1835 return VReg_192RegClassID;
1836 case 256:
1837 return VReg_256RegClassID;
1838 case 288:
1839 return VReg_288RegClassID;
1840 case 320:
1841 return VReg_320RegClassID;
1842 case 352:
1843 return VReg_352RegClassID;
1844 case 384:
1845 return VReg_384RegClassID;
1846 case 512:
1847 return VReg_512RegClassID;
1848 case 1024:
1849 return VReg_1024RegClassID;
1850 }
1851 llvm_unreachable("Invalid register width!");
1852}
1853
1854unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1855 using namespace AMDGPU;
1856
1857 switch (Width) {
1858 case 16:
1859 case 32:
1860 return AGPR_32RegClassID;
1861 case 64:
1862 return AReg_64RegClassID;
1863 case 96:
1864 return AReg_96RegClassID;
1865 case 128:
1866 return AReg_128RegClassID;
1867 case 160:
1868 return AReg_160RegClassID;
1869 case 256:
1870 return AReg_256RegClassID;
1871 case 288:
1872 return AReg_288RegClassID;
1873 case 320:
1874 return AReg_320RegClassID;
1875 case 352:
1876 return AReg_352RegClassID;
1877 case 384:
1878 return AReg_384RegClassID;
1879 case 512:
1880 return AReg_512RegClassID;
1881 case 1024:
1882 return AReg_1024RegClassID;
1883 }
1884 llvm_unreachable("Invalid register width!");
1885}
1886
1887unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1888 using namespace AMDGPU;
1889
1890 switch (Width) {
1891 case 16:
1892 case 32:
1893 return SGPR_32RegClassID;
1894 case 64:
1895 return SGPR_64RegClassID;
1896 case 96:
1897 return SGPR_96RegClassID;
1898 case 128:
1899 return SGPR_128RegClassID;
1900 case 160:
1901 return SGPR_160RegClassID;
1902 case 256:
1903 return SGPR_256RegClassID;
1904 case 288:
1905 return SGPR_288RegClassID;
1906 case 320:
1907 return SGPR_320RegClassID;
1908 case 352:
1909 return SGPR_352RegClassID;
1910 case 384:
1911 return SGPR_384RegClassID;
1912 case 512:
1913 return SGPR_512RegClassID;
1914 }
1915 llvm_unreachable("Invalid register width!");
1916}
1917
1918unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1919 using namespace AMDGPU;
1920
1921 switch (Width) {
1922 case 16:
1923 case 32:
1924 return TTMP_32RegClassID;
1925 case 64:
1926 return TTMP_64RegClassID;
1927 case 128:
1928 return TTMP_128RegClassID;
1929 case 256:
1930 return TTMP_256RegClassID;
1931 case 288:
1932 return TTMP_288RegClassID;
1933 case 320:
1934 return TTMP_320RegClassID;
1935 case 352:
1936 return TTMP_352RegClassID;
1937 case 384:
1938 return TTMP_384RegClassID;
1939 case 512:
1940 return TTMP_512RegClassID;
1941 }
1942 llvm_unreachable("Invalid register width!");
1943}
1944
1945int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1946 using namespace AMDGPU::EncValues;
1947
1948 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1949 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1950
1951 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1952}
1953
1955 unsigned Val) const {
1956 using namespace AMDGPU::EncValues;
1957
1958 assert(Val < 1024); // enum10
1959
1960 bool IsAGPR = Val & 512;
1961 Val &= 511;
1962
1963 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1964 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1965 : getVgprClassId(Width), Val - VGPR_MIN);
1966 }
1967 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1968}
1969
1971 unsigned Width,
1972 unsigned Val) const {
1973 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1974 // decoded earlier.
1975 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1976 using namespace AMDGPU::EncValues;
1977
1978 if (Val <= SGPR_MAX) {
1979 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1980 static_assert(SGPR_MIN == 0);
1981 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1982 }
1983
1984 int TTmpIdx = getTTmpIdx(Val);
1985 if (TTmpIdx >= 0) {
1986 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1987 }
1988
1989 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1990 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1991 Val == LITERAL_CONST)
1992 return MCOperand::createImm(Val);
1993
1994 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1995 return decodeLiteral64Constant();
1996 }
1997
1998 switch (Width) {
1999 case 32:
2000 case 16:
2001 return decodeSpecialReg32(Val);
2002 case 64:
2003 return decodeSpecialReg64(Val);
2004 case 96:
2005 case 128:
2006 case 256:
2007 case 512:
2008 return decodeSpecialReg96Plus(Val);
2009 default:
2010 llvm_unreachable("unexpected immediate type");
2011 }
2012}
2013
2014// Bit 0 of DstY isn't stored in the instruction, because it's always the
2015// opposite of bit 0 of DstX.
2017 unsigned Val) const {
2018 int VDstXInd =
2019 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2020 assert(VDstXInd != -1);
2021 assert(Inst.getOperand(VDstXInd).isReg());
2022 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2023 Val |= ~XDstReg & 1;
2024 return createRegOperand(getVgprClassId(32), Val);
2025}
2026
2028 using namespace AMDGPU;
2029
2030 switch (Val) {
2031 // clang-format off
2032 case 102: return createRegOperand(FLAT_SCR_LO);
2033 case 103: return createRegOperand(FLAT_SCR_HI);
2034 case 104: return createRegOperand(XNACK_MASK_LO);
2035 case 105: return createRegOperand(XNACK_MASK_HI);
2036 case 106: return createRegOperand(VCC_LO);
2037 case 107: return createRegOperand(VCC_HI);
2038 case 108: return createRegOperand(TBA_LO);
2039 case 109: return createRegOperand(TBA_HI);
2040 case 110: return createRegOperand(TMA_LO);
2041 case 111: return createRegOperand(TMA_HI);
2042 case 124:
2043 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2044 case 125:
2045 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2046 case 126: return createRegOperand(EXEC_LO);
2047 case 127: return createRegOperand(EXEC_HI);
2048 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2049 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2050 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2051 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2052 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2053 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2054 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2055 case 251: return createRegOperand(SRC_VCCZ);
2056 case 252: return createRegOperand(SRC_EXECZ);
2057 case 253: return createRegOperand(SRC_SCC);
2058 case 254: return createRegOperand(LDS_DIRECT);
2059 default: break;
2060 // clang-format on
2061 }
2062 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2063}
2064
2066 using namespace AMDGPU;
2067
2068 switch (Val) {
2069 case 102: return createRegOperand(FLAT_SCR);
2070 case 104: return createRegOperand(XNACK_MASK);
2071 case 106: return createRegOperand(VCC);
2072 case 108: return createRegOperand(TBA);
2073 case 110: return createRegOperand(TMA);
2074 case 124:
2075 if (isGFX11Plus())
2076 return createRegOperand(SGPR_NULL);
2077 break;
2078 case 125:
2079 if (!isGFX11Plus())
2080 return createRegOperand(SGPR_NULL);
2081 break;
2082 case 126: return createRegOperand(EXEC);
2083 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2084 case 235: return createRegOperand(SRC_SHARED_BASE);
2085 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2086 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2087 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2088 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2089 case 251: return createRegOperand(SRC_VCCZ);
2090 case 252: return createRegOperand(SRC_EXECZ);
2091 case 253: return createRegOperand(SRC_SCC);
2092 default: break;
2093 }
2094 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2095}
2096
2098 using namespace AMDGPU;
2099
2100 switch (Val) {
2101 case 124:
2102 if (isGFX11Plus())
2103 return createRegOperand(SGPR_NULL);
2104 break;
2105 case 125:
2106 if (!isGFX11Plus())
2107 return createRegOperand(SGPR_NULL);
2108 break;
2109 default:
2110 break;
2111 }
2112 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2113}
2114
2116 const unsigned Val) const {
2117 using namespace AMDGPU::SDWA;
2118 using namespace AMDGPU::EncValues;
2119
2120 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2121 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2122 // XXX: cast to int is needed to avoid stupid warning:
2123 // compare with unsigned is always true
2124 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2125 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2126 return createRegOperand(getVgprClassId(Width),
2127 Val - SDWA9EncValues::SRC_VGPR_MIN);
2128 }
2129 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2130 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2131 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2132 return createSRegOperand(getSgprClassId(Width),
2133 Val - SDWA9EncValues::SRC_SGPR_MIN);
2134 }
2135 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2136 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2137 return createSRegOperand(getTtmpClassId(Width),
2138 Val - SDWA9EncValues::SRC_TTMP_MIN);
2139 }
2140
2141 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2142
2143 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2144 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2145 return MCOperand::createImm(SVal);
2146
2147 return decodeSpecialReg32(SVal);
2148 }
2149 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2150 return createRegOperand(getVgprClassId(Width), Val);
2151 llvm_unreachable("unsupported target");
2152}
2153
2155 return decodeSDWASrc(16, Val);
2156}
2157
2159 return decodeSDWASrc(32, Val);
2160}
2161
2163 using namespace AMDGPU::SDWA;
2164
2165 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2166 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2167 "SDWAVopcDst should be present only on GFX9+");
2168
2169 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2170
2171 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2172 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2173
2174 int TTmpIdx = getTTmpIdx(Val);
2175 if (TTmpIdx >= 0) {
2176 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2177 return createSRegOperand(TTmpClsId, TTmpIdx);
2178 }
2179 if (Val > SGPR_MAX) {
2180 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2181 }
2182 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2183 }
2184 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2185}
2186
2188 unsigned Val) const {
2189 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2190 ? decodeSrcOp(Inst, 32, Val)
2191 : decodeSrcOp(Inst, 64, Val);
2192}
2193
2195 unsigned Val) const {
2196 return decodeSrcOp(Inst, 32, Val);
2197}
2198
2201 return MCOperand();
2202 return MCOperand::createImm(Val);
2203}
2204
2206 using VersionField = AMDGPU::EncodingField<7, 0>;
2207 using W64Bit = AMDGPU::EncodingBit<13>;
2208 using W32Bit = AMDGPU::EncodingBit<14>;
2209 using MDPBit = AMDGPU::EncodingBit<15>;
2211
2212 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2213
2214 // Decode into a plain immediate if any unused bits are raised.
2215 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2216 return MCOperand::createImm(Imm);
2217
2218 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2219 const auto *I = find_if(
2220 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2221 return V.Code == Version;
2222 });
2223 MCContext &Ctx = getContext();
2224 const MCExpr *E;
2225 if (I == Versions.end())
2227 else
2228 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2229
2230 if (W64)
2231 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2232 if (W32)
2233 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2234 if (MDP)
2235 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2236
2237 return MCOperand::createExpr(E);
2238}
2239
2241 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2242}
2243
2245
2247 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2248}
2249
2251
2253
2257
2259 return STI.hasFeature(AMDGPU::FeatureGFX11);
2260}
2261
2265
2267
2269 return STI.hasFeature(AMDGPU::FeatureGFX12);
2270}
2271
2275
2277
2281
2283
2287
2289 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2290}
2291
2295
2296//===----------------------------------------------------------------------===//
2297// AMDGPU specific symbol handling
2298//===----------------------------------------------------------------------===//
2299
2300/// Print a string describing the reserved bit range specified by Mask with
2301/// offset BaseBytes for use in error comments. Mask is a single continuous
2302/// range of 1s surrounded by zeros. The format here is meant to align with the
2303/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2304static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2305 SmallString<32> Result;
2306 raw_svector_ostream S(Result);
2307
2308 int TrailingZeros = llvm::countr_zero(Mask);
2309 int PopCount = llvm::popcount(Mask);
2310
2311 if (PopCount == 1) {
2312 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2313 } else {
2314 S << "bits in range ("
2315 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2316 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2317 }
2318
2319 return Result;
2320}
2321
2322#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2323#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2324 do { \
2325 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2326 } while (0)
2327#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2328 do { \
2329 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2330 << GET_FIELD(MASK) << '\n'; \
2331 } while (0)
2332
2333#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2334 do { \
2335 if (FourByteBuffer & (MASK)) { \
2336 return createStringError(std::errc::invalid_argument, \
2337 "kernel descriptor " DESC \
2338 " reserved %s set" MSG, \
2339 getBitRangeFromMask((MASK), 0).c_str()); \
2340 } \
2341 } while (0)
2342
2343#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2344#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2345 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2346#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2347 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2348#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2349 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2350
2351// NOLINTNEXTLINE(readability-identifier-naming)
2353 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2354 using namespace amdhsa;
2355 StringRef Indent = "\t";
2356
2357 // We cannot accurately backward compute #VGPRs used from
2358 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2359 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2360 // simply calculate the inverse of what the assembler does.
2361
2362 uint32_t GranulatedWorkitemVGPRCount =
2363 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2364
2365 uint32_t NextFreeVGPR =
2366 (GranulatedWorkitemVGPRCount + 1) *
2367 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2368
2369 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2370
2371 // We cannot backward compute values used to calculate
2372 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2373 // directives can't be computed:
2374 // .amdhsa_reserve_vcc
2375 // .amdhsa_reserve_flat_scratch
2376 // .amdhsa_reserve_xnack_mask
2377 // They take their respective default values if not specified in the assembly.
2378 //
2379 // GRANULATED_WAVEFRONT_SGPR_COUNT
2380 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2381 //
2382 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2383 // are set to 0. So while disassembling we consider that:
2384 //
2385 // GRANULATED_WAVEFRONT_SGPR_COUNT
2386 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2387 //
2388 // The disassembler cannot recover the original values of those 3 directives.
2389
2390 uint32_t GranulatedWavefrontSGPRCount =
2391 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2392
2393 if (isGFX10Plus())
2394 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2395 "must be zero on gfx10+");
2396
2397 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2399
2400 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2402 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2403 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2404 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2405 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2406 << '\n';
2407 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2408
2409 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2410
2411 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2412 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2413 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2414 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2415 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2416 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2417 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2418 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2419
2420 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2421
2422 if (!isGFX12Plus())
2423 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2424 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2425
2426 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2427
2428 if (!isGFX12Plus())
2429 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2430 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2431
2432 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2433 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2434
2435 // Bits [26].
2436 if (isGFX9Plus()) {
2437 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2438 } else {
2439 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2440 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2441 }
2442
2443 // Bits [27].
2444 if (isGFX1250Plus()) {
2445 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2446 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2447 } else {
2448 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2449 "COMPUTE_PGM_RSRC1");
2450 }
2451
2452 // Bits [28].
2453 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2454
2455 // Bits [29-31].
2456 if (isGFX10Plus()) {
2457 // WGP_MODE is not available on GFX1250.
2458 if (!isGFX1250Plus()) {
2459 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2460 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2461 }
2462 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2463 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2464 } else {
2465 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2466 "COMPUTE_PGM_RSRC1");
2467 }
2468
2469 if (isGFX12Plus())
2470 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2471 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2472
2473 return true;
2474}
2475
2476// NOLINTNEXTLINE(readability-identifier-naming)
2478 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2479 using namespace amdhsa;
2480 StringRef Indent = "\t";
2482 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2483 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2484 else
2485 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2486 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2487 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2488 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2489 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2490 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2491 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2492 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2493 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2494 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2495 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2496 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2497
2498 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2499 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2500 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2501
2503 ".amdhsa_exception_fp_ieee_invalid_op",
2504 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2505 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2506 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2508 ".amdhsa_exception_fp_ieee_div_zero",
2509 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2510 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2511 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2512 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2513 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2514 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2515 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2516 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2517 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2518
2519 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2520
2521 return true;
2522}
2523
2524// NOLINTNEXTLINE(readability-identifier-naming)
2526 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2527 using namespace amdhsa;
2528 StringRef Indent = "\t";
2529 if (isGFX90A()) {
2530 KdStream << Indent << ".amdhsa_accum_offset "
2531 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2532 << '\n';
2533
2534 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2535
2536 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2537 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2538 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2539 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2540 } else if (isGFX10Plus()) {
2541 // Bits [0-3].
2542 if (!isGFX12Plus()) {
2543 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2544 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2545 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2546 } else {
2548 "SHARED_VGPR_COUNT",
2549 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2550 }
2551 } else {
2552 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2553 "COMPUTE_PGM_RSRC3",
2554 "must be zero on gfx12+");
2555 }
2556
2557 // Bits [4-11].
2558 if (isGFX11()) {
2559 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2560 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2561 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2562 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2563 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2564 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2565 } else if (isGFX12Plus()) {
2566 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2567 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2568 } else {
2569 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2570 "COMPUTE_PGM_RSRC3",
2571 "must be zero on gfx10");
2572 }
2573
2574 // Bits [12].
2575 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2576 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2577
2578 // Bits [13].
2579 if (isGFX12Plus()) {
2581 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2582 } else {
2583 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2584 "COMPUTE_PGM_RSRC3",
2585 "must be zero on gfx10 or gfx11");
2586 }
2587
2588 // Bits [14-21].
2589 if (isGFX1250Plus()) {
2590 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2591 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2593 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2595 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2597 "ENABLE_DIDT_THROTTLE",
2598 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2599 } else {
2600 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2601 "COMPUTE_PGM_RSRC3",
2602 "must be zero on gfx10+");
2603 }
2604
2605 // Bits [22-30].
2606 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2607 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2608
2609 // Bits [31].
2610 if (isGFX11Plus()) {
2612 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2613 } else {
2614 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2615 "COMPUTE_PGM_RSRC3",
2616 "must be zero on gfx10");
2617 }
2618 } else if (FourByteBuffer) {
2619 return createStringError(
2620 std::errc::invalid_argument,
2621 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2622 }
2623 return true;
2624}
2625#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2626#undef PRINT_DIRECTIVE
2627#undef GET_FIELD
2628#undef CHECK_RESERVED_BITS_IMPL
2629#undef CHECK_RESERVED_BITS
2630#undef CHECK_RESERVED_BITS_MSG
2631#undef CHECK_RESERVED_BITS_DESC
2632#undef CHECK_RESERVED_BITS_DESC_MSG
2633
2634/// Create an error object to return from onSymbolStart for reserved kernel
2635/// descriptor bits being set.
2636static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2637 const char *Msg = "") {
2638 return createStringError(
2639 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2640 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2641}
2642
2643/// Create an error object to return from onSymbolStart for reserved kernel
2644/// descriptor bytes being set.
2645static Error createReservedKDBytesError(unsigned BaseInBytes,
2646 unsigned WidthInBytes) {
2647 // Create an error comment in the same format as the "Kernel Descriptor"
2648 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2649 return createStringError(
2650 std::errc::invalid_argument,
2651 "kernel descriptor reserved bits in range (%u:%u) set",
2652 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2653}
2654
2657 raw_string_ostream &KdStream) const {
2658#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2659 do { \
2660 KdStream << Indent << DIRECTIVE " " \
2661 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2662 } while (0)
2663
2664 uint16_t TwoByteBuffer = 0;
2665 uint32_t FourByteBuffer = 0;
2666
2667 StringRef ReservedBytes;
2668 StringRef Indent = "\t";
2669
2670 assert(Bytes.size() == 64);
2671 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2672
2673 switch (Cursor.tell()) {
2675 FourByteBuffer = DE.getU32(Cursor);
2676 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2677 << '\n';
2678 return true;
2679
2681 FourByteBuffer = DE.getU32(Cursor);
2682 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2683 << FourByteBuffer << '\n';
2684 return true;
2685
2687 FourByteBuffer = DE.getU32(Cursor);
2688 KdStream << Indent << ".amdhsa_kernarg_size "
2689 << FourByteBuffer << '\n';
2690 return true;
2691
2693 // 4 reserved bytes, must be 0.
2694 ReservedBytes = DE.getBytes(Cursor, 4);
2695 for (char B : ReservedBytes) {
2696 if (B != 0)
2698 }
2699 return true;
2700
2702 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2703 // So far no directive controls this for Code Object V3, so simply skip for
2704 // disassembly.
2705 DE.skip(Cursor, 8);
2706 return true;
2707
2709 // 20 reserved bytes, must be 0.
2710 ReservedBytes = DE.getBytes(Cursor, 20);
2711 for (char B : ReservedBytes) {
2712 if (B != 0)
2714 }
2715 return true;
2716
2718 FourByteBuffer = DE.getU32(Cursor);
2719 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2720
2722 FourByteBuffer = DE.getU32(Cursor);
2723 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2724
2726 FourByteBuffer = DE.getU32(Cursor);
2727 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2728
2730 using namespace amdhsa;
2731 TwoByteBuffer = DE.getU16(Cursor);
2732
2734 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2735 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2736 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2738 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2739 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2740 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2741 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2742 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2743 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2745 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2747 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2749
2750 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2751 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2753
2754 // Reserved for GFX9
2755 if (isGFX9() &&
2756 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2758 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2759 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2760 }
2761 if (isGFX10Plus()) {
2762 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2763 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2764 }
2765
2766 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2767 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2768 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2769
2770 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2771 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2773 }
2774
2775 return true;
2776
2778 using namespace amdhsa;
2779 TwoByteBuffer = DE.getU16(Cursor);
2780 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2781 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2782 KERNARG_PRELOAD_SPEC_LENGTH);
2783 }
2784
2785 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2786 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2787 KERNARG_PRELOAD_SPEC_OFFSET);
2788 }
2789 return true;
2790
2792 // 4 bytes from here are reserved, must be 0.
2793 ReservedBytes = DE.getBytes(Cursor, 4);
2794 for (char B : ReservedBytes) {
2795 if (B != 0)
2797 }
2798 return true;
2799
2800 default:
2801 llvm_unreachable("Unhandled index. Case statements cover everything.");
2802 return true;
2803 }
2804#undef PRINT_DIRECTIVE
2805}
2806
2808 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2809
2810 // CP microcode requires the kernel descriptor to be 64 aligned.
2811 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2812 return createStringError(std::errc::invalid_argument,
2813 "kernel descriptor must be 64-byte aligned");
2814
2815 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2816 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2817 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2818 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2819 // when required.
2820 if (isGFX10Plus()) {
2821 uint16_t KernelCodeProperties =
2824 EnableWavefrontSize32 =
2825 AMDHSA_BITS_GET(KernelCodeProperties,
2826 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2827 }
2828
2829 std::string Kd;
2830 raw_string_ostream KdStream(Kd);
2831 KdStream << ".amdhsa_kernel " << KdName << '\n';
2832
2834 while (C && C.tell() < Bytes.size()) {
2835 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2836
2837 cantFail(C.takeError());
2838
2839 if (!Res)
2840 return Res;
2841 }
2842 KdStream << ".end_amdhsa_kernel\n";
2843 outs() << KdStream.str();
2844 return true;
2845}
2846
2848 uint64_t &Size,
2849 ArrayRef<uint8_t> Bytes,
2850 uint64_t Address) const {
2851 // Right now only kernel descriptor needs to be handled.
2852 // We ignore all other symbols for target specific handling.
2853 // TODO:
2854 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2855 // Object V2 and V3 when symbols are marked protected.
2856
2857 // amd_kernel_code_t for Code Object V2.
2858 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2859 Size = 256;
2860 return createStringError(std::errc::invalid_argument,
2861 "code object v2 is not supported");
2862 }
2863
2864 // Code Object V3 kernel descriptors.
2865 StringRef Name = Symbol.Name;
2866 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2867 Size = 64; // Size = 64 regardless of success or failure.
2868 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2869 }
2870
2871 return false;
2872}
2873
2874const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2875 int64_t Val) {
2876 MCContext &Ctx = getContext();
2877 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2878 // Note: only set value to Val on a new symbol in case an dissassembler
2879 // has already been initialized in this context.
2880 if (!Sym->isVariable()) {
2882 } else {
2883 int64_t Res = ~Val;
2884 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2885 if (!Valid || Res != Val)
2886 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2887 }
2888 return MCSymbolRefExpr::create(Sym, Ctx);
2889}
2890
2892 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2893
2894 // Check for MUBUF and MTBUF instructions
2895 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2896 return true;
2897
2898 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2899 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2900 return true;
2901
2902 return false;
2903}
2904
2905//===----------------------------------------------------------------------===//
2906// AMDGPUSymbolizer
2907//===----------------------------------------------------------------------===//
2908
2909// Try to find symbol name for specified label
2911 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2912 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2913 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2914
2915 if (!IsBranch) {
2916 return false;
2917 }
2918
2919 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2920 if (!Symbols)
2921 return false;
2922
2923 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2924 return Val.Addr == static_cast<uint64_t>(Value) &&
2925 Val.Type == ELF::STT_NOTYPE;
2926 });
2927 if (Result != Symbols->end()) {
2928 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2929 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2931 return true;
2932 }
2933 // Add to list of referenced addresses, so caller can synthesize a label.
2934 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2935 return false;
2936}
2937
2939 int64_t Value,
2940 uint64_t Address) {
2941 llvm_unreachable("unimplemented");
2942}
2943
2944//===----------------------------------------------------------------------===//
2945// Initialization
2946//===----------------------------------------------------------------------===//
2947
2949 LLVMOpInfoCallback /*GetOpInfo*/,
2950 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2951 void *DisInfo,
2952 MCContext *Ctx,
2953 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2954 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2955}
2956
2958 const MCSubtargetInfo &STI,
2959 MCContext &Ctx) {
2960 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2961}
2962
2963extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:233
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:207
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:218
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:227
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:239
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:215
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:240
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:222
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:204
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1170(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1418
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1432
@ STT_OBJECT
Definition ELF.h:1419
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.