LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
535 Imm = getInlineImmValF16(Imm);
536 break;
542 Imm = getInlineImmVal64(Imm);
543 break;
544 default:
545 Imm = getInlineImmVal32(Imm);
546 }
547 Op.setImm(Imm);
548 }
549 }
550}
551
553 ArrayRef<uint8_t> Bytes_,
555 raw_ostream &CS) const {
556 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
557 Bytes = Bytes_.slice(0, MaxInstBytesNum);
558
559 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
560 // there are fewer bytes left). This will be overridden on success.
561 Size = std::min((size_t)4, Bytes_.size());
562
563 do {
564 // ToDo: better to switch encoding length using some bit predicate
565 // but it is unknown yet, so try all we can
566
567 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
568 // encodings
569 if (isGFX1250() && Bytes.size() >= 16) {
570 std::bitset<128> DecW = eat16Bytes(Bytes);
571 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
572 break;
573 Bytes = Bytes_.slice(0, MaxInstBytesNum);
574 }
575
576 if (isGFX11Plus() && Bytes.size() >= 12) {
577 std::bitset<96> DecW = eat12Bytes(Bytes);
578
579 if (isGFX11() &&
580 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
581 DecW, Address, CS))
582 break;
583
584 if (isGFX1250() &&
585 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
586 DecW, Address, CS))
587 break;
588
589 if (isGFX12() &&
590 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
591 DecW, Address, CS))
592 break;
593
594 if (isGFX12() &&
595 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
596 break;
597
598 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
599 // Return 8 bytes for a potential literal.
600 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
601
602 if (isGFX1250() &&
603 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
604 break;
605 }
606
607 // Reinitialize Bytes
608 Bytes = Bytes_.slice(0, MaxInstBytesNum);
609
610 } else if (Bytes.size() >= 16 &&
611 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
612 std::bitset<128> DecW = eat16Bytes(Bytes);
613 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
614 break;
615
616 // Reinitialize Bytes
617 Bytes = Bytes_.slice(0, MaxInstBytesNum);
618 }
619
620 if (Bytes.size() >= 8) {
621 const uint64_t QW = eatBytes<uint64_t>(Bytes);
622
623 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
624 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
625 break;
626
627 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
628 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
629 break;
630
631 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
632 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
633 break;
634
635 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
636 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
637 // table first so we print the correct name.
638 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
639 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
640 break;
641
642 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
643 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
644 break;
645
646 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
647 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
648 break;
649
650 if ((isVI() || isGFX9()) &&
651 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
652 break;
653
654 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
655 break;
656
657 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
658 break;
659
660 if (isGFX1250() &&
661 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
662 QW, Address, CS))
663 break;
664
665 if (isGFX12() &&
666 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
667 Address, CS))
668 break;
669
670 if (isGFX11() &&
671 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
672 Address, CS))
673 break;
674
675 if (isGFX11() &&
676 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
677 break;
678
679 if (isGFX12() &&
680 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
681 break;
682
683 // Reinitialize Bytes
684 Bytes = Bytes_.slice(0, MaxInstBytesNum);
685 }
686
687 // Try decode 32-bit instruction
688 if (Bytes.size() >= 4) {
689 const uint32_t DW = eatBytes<uint32_t>(Bytes);
690
691 if ((isVI() || isGFX9()) &&
692 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
693 break;
694
695 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
696 break;
697
698 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
699 break;
700
701 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
702 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
703 break;
704
705 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
706 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
707 break;
708
709 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
710 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
711 break;
712
713 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
714 break;
715
716 if (isGFX11() &&
717 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
718 Address, CS))
719 break;
720
721 if (isGFX1250() &&
722 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
723 DW, Address, CS))
724 break;
725
726 if (isGFX12() &&
727 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
728 Address, CS))
729 break;
730 }
731
733 } while (false);
734
736
737 decodeImmOperands(MI, *MCII);
738
739 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
740 if (isMacDPP(MI))
742
743 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
745 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
746 convertVOPCDPPInst(MI); // Special VOP3 case
747 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
748 convertVOPC64DPPInst(MI); // Special VOP3 case
749 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
750 -1)
752 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
753 convertVOP3DPPInst(MI); // Regular VOP3 case
754 }
755
757
758 if (AMDGPU::isMAC(MI.getOpcode())) {
759 // Insert dummy unused src2_modifiers.
761 AMDGPU::OpName::src2_modifiers);
762 }
763
764 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
765 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
766 // Insert dummy unused src2_modifiers.
768 AMDGPU::OpName::src2_modifiers);
769 }
770
771 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
773 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
774 }
775
776 if (MCII->get(MI.getOpcode()).TSFlags &
778 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
779 AMDGPU::OpName::cpol);
780 if (CPolPos != -1) {
781 unsigned CPol =
782 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
784 if (MI.getNumOperands() <= (unsigned)CPolPos) {
786 AMDGPU::OpName::cpol);
787 } else if (CPol) {
788 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
789 }
790 }
791 }
792
793 if ((MCII->get(MI.getOpcode()).TSFlags &
795 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
796 // GFX90A lost TFE, its place is occupied by ACC.
797 int TFEOpIdx =
798 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
799 if (TFEOpIdx != -1) {
800 auto *TFEIter = MI.begin();
801 std::advance(TFEIter, TFEOpIdx);
802 MI.insert(TFEIter, MCOperand::createImm(0));
803 }
804 }
805
806 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
808 int OffsetIdx =
809 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
810 if (OffsetIdx != -1) {
811 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
812 int64_t SignedOffset = SignExtend64<24>(Imm);
813 if (SignedOffset < 0)
815 }
816 }
817
818 if (MCII->get(MI.getOpcode()).TSFlags &
820 int SWZOpIdx =
821 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
822 if (SWZOpIdx != -1) {
823 auto *SWZIter = MI.begin();
824 std::advance(SWZIter, SWZOpIdx);
825 MI.insert(SWZIter, MCOperand::createImm(0));
826 }
827 }
828
829 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
830 if (Desc.TSFlags & SIInstrFlags::MIMG) {
831 int VAddr0Idx =
832 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
833 int RsrcIdx =
834 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
835 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
836 if (VAddr0Idx >= 0 && NSAArgs > 0) {
837 unsigned NSAWords = (NSAArgs + 3) / 4;
838 if (Bytes.size() < 4 * NSAWords)
840 for (unsigned i = 0; i < NSAArgs; ++i) {
841 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
842 auto VAddrRCID =
843 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
844 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
845 }
846 Bytes = Bytes.slice(4 * NSAWords);
847 }
848
850 }
851
852 if (MCII->get(MI.getOpcode()).TSFlags &
855
856 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
858
859 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
861
862 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
864
865 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
867
868 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
870
871 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
872 AMDGPU::OpName::vdst_in);
873 if (VDstIn_Idx != -1) {
874 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
876 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
877 !MI.getOperand(VDstIn_Idx).isReg() ||
878 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
879 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
880 MI.erase(&MI.getOperand(VDstIn_Idx));
882 MCOperand::createReg(MI.getOperand(Tied).getReg()),
883 AMDGPU::OpName::vdst_in);
884 }
885 }
886
887 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
888 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
890
891 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
892 // have EXEC as implicit destination. Issue a warning if encoding for
893 // vdst is not EXEC.
894 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
895 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
896 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
897 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
898 if (Bytes_[0] != ExecEncoding)
900 }
901
902 Size = MaxInstBytesNum - Bytes.size();
903 return Status;
904}
905
907 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
908 // The MCInst still has these fields even though they are no longer encoded
909 // in the GFX11 instruction.
910 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
911 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
912 }
913}
914
917 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
930 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
931 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
933 // The MCInst has this field that is not directly encoded in the
934 // instruction.
935 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
936 }
937}
938
940 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
941 STI.hasFeature(AMDGPU::FeatureGFX10)) {
942 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
943 // VOPC - insert clamp
944 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
945 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
946 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
947 if (SDst != -1) {
948 // VOPC - insert VCC register as sdst
950 AMDGPU::OpName::sdst);
951 } else {
952 // VOP1/2 - insert omod if present in instruction
953 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
954 }
955 }
956}
957
958/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
959/// appropriate subregister for the used format width.
961 MCOperand &MO, uint8_t NumRegs) {
962 switch (NumRegs) {
963 case 4:
964 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
965 case 6:
966 return MO.setReg(
967 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
968 case 8:
969 if (MCRegister NewReg = MRI.getSubReg(
970 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
971 MO.setReg(NewReg);
972 }
973 return;
974 case 12: {
975 // There is no 384-bit subreg index defined.
976 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
977 MCRegister NewReg = MRI.getMatchingSuperReg(
978 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
979 return MO.setReg(NewReg);
980 }
981 case 16:
982 // No-op in cases where one operand is still f8/bf8.
983 return;
984 default:
985 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
986 }
987}
988
989/// f8f6f4 instructions have different pseudos depending on the used formats. In
990/// the disassembler table, we only have the variants with the largest register
991/// classes which assume using an fp8/bf8 format for both operands. The actual
992/// register class depends on the format in blgp and cbsz operands. Adjust the
993/// register classes depending on the used format.
995 int BlgpIdx =
996 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
997 if (BlgpIdx == -1)
998 return;
999
1000 int CbszIdx =
1001 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1002
1003 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1004 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1005
1006 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1007 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1008 if (!AdjustedRegClassOpcode ||
1009 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1010 return;
1011
1012 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1013 int Src0Idx =
1014 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1015 int Src1Idx =
1016 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1017 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1018 AdjustedRegClassOpcode->NumRegsSrcA);
1019 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1020 AdjustedRegClassOpcode->NumRegsSrcB);
1021}
1022
1024 int FmtAIdx =
1025 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1026 if (FmtAIdx == -1)
1027 return;
1028
1029 int FmtBIdx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1031
1032 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1033 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1034
1035 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1036 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1039 return;
1040
1041 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1042 int Src0Idx =
1043 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1044 int Src1Idx =
1045 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1046 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1047 AdjustedRegClassOpcode->NumRegsSrcA);
1048 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1049 AdjustedRegClassOpcode->NumRegsSrcB);
1050}
1051
1053 unsigned OpSel = 0;
1054 unsigned OpSelHi = 0;
1055 unsigned NegLo = 0;
1056 unsigned NegHi = 0;
1057};
1058
1059// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1060// Note that these values do not affect disassembler output,
1061// so this is only necessary for consistency with src_modifiers.
1063 bool IsVOP3P = false) {
1064 VOPModifiers Modifiers;
1065 unsigned Opc = MI.getOpcode();
1066 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1067 AMDGPU::OpName::src1_modifiers,
1068 AMDGPU::OpName::src2_modifiers};
1069 for (int J = 0; J < 3; ++J) {
1070 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1071 if (OpIdx == -1)
1072 continue;
1073
1074 unsigned Val = MI.getOperand(OpIdx).getImm();
1075
1076 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1077 if (IsVOP3P) {
1078 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1079 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1080 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1081 } else if (J == 0) {
1082 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1083 }
1084 }
1085
1086 return Modifiers;
1087}
1088
1089// Instructions decode the op_sel/suffix bits into the src_modifier
1090// operands. Copy those bits into the src operands for true16 VGPRs.
1092 const unsigned Opc = MI.getOpcode();
1093 const MCRegisterClass &ConversionRC =
1094 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1095 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1096 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1098 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1100 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1102 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1104 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1105 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1106 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1107 if (OpIdx == -1 || OpModsIdx == -1)
1108 continue;
1109 MCOperand &Op = MI.getOperand(OpIdx);
1110 if (!Op.isReg())
1111 continue;
1112 if (!ConversionRC.contains(Op.getReg()))
1113 continue;
1114 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1115 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1116 unsigned ModVal = OpMods.getImm();
1117 if (ModVal & OpSelMask) { // isHi
1118 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1119 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1120 }
1121 }
1122}
1123
1124// MAC opcodes have special old and src2 operands.
1125// src2 is tied to dst, while old is not tied (but assumed to be).
1127 constexpr int DST_IDX = 0;
1128 auto Opcode = MI.getOpcode();
1129 const auto &Desc = MCII->get(Opcode);
1130 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1131
1132 if (OldIdx != -1 && Desc.getOperandConstraint(
1133 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1134 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1135 assert(Desc.getOperandConstraint(
1136 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1138 (void)DST_IDX;
1139 return true;
1140 }
1141
1142 return false;
1143}
1144
1145// Create dummy old operand and insert dummy unused src2_modifiers
1147 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1148 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1150 AMDGPU::OpName::src2_modifiers);
1151}
1152
1154 unsigned Opc = MI.getOpcode();
1155
1156 int VDstInIdx =
1157 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1158 if (VDstInIdx != -1)
1159 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1160
1161 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1162 if (MI.getNumOperands() < DescNumOps &&
1163 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1165 auto Mods = collectVOPModifiers(MI);
1167 AMDGPU::OpName::op_sel);
1168 } else {
1169 // Insert dummy unused src modifiers.
1170 if (MI.getNumOperands() < DescNumOps &&
1171 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1173 AMDGPU::OpName::src0_modifiers);
1174
1175 if (MI.getNumOperands() < DescNumOps &&
1176 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1178 AMDGPU::OpName::src1_modifiers);
1179 }
1180}
1181
1184
1185 int VDstInIdx =
1186 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1188 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1189
1190 unsigned Opc = MI.getOpcode();
1191 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1192 if (MI.getNumOperands() < DescNumOps &&
1193 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1194 auto Mods = collectVOPModifiers(MI);
1196 AMDGPU::OpName::op_sel);
1197 }
1198}
1199
1200// Given a wide tuple \p Reg check if it will overflow 256 registers.
1201// \returns \p Reg on success or NoRegister otherwise.
1202static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
1203 const MCRegisterInfo &MRI) {
1204 unsigned NumRegs = RC.getSizeInBits() / 32;
1205 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1206 if (!Sub0)
1207 return Reg;
1208
1209 MCRegister BaseReg;
1210 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1211 BaseReg = AMDGPU::VGPR0;
1212 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1213 BaseReg = AMDGPU::AGPR0;
1214
1215 assert(BaseReg && "Only vector registers expected");
1216
1217 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
1218}
1219
1220// Note that before gfx10, the MIMG encoding provided no information about
1221// VADDR size. Consequently, decoded instructions always show address as if it
1222// has 1 dword, which could be not really so.
1224 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1225
1226 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1227 AMDGPU::OpName::vdst);
1228
1229 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1230 AMDGPU::OpName::vdata);
1231 int VAddr0Idx =
1232 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1233 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1234 ? AMDGPU::OpName::srsrc
1235 : AMDGPU::OpName::rsrc;
1236 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1237 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1238 AMDGPU::OpName::dmask);
1239
1240 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1241 AMDGPU::OpName::tfe);
1242 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1243 AMDGPU::OpName::d16);
1244
1245 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1246 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1247 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1248
1249 assert(VDataIdx != -1);
1250 if (BaseOpcode->BVH) {
1251 // Add A16 operand for intersect_ray instructions
1252 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1253 return;
1254 }
1255
1256 bool IsAtomic = (VDstIdx != -1);
1257 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1258 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1259 bool IsNSA = false;
1260 bool IsPartialNSA = false;
1261 unsigned AddrSize = Info->VAddrDwords;
1262
1263 if (isGFX10Plus()) {
1264 unsigned DimIdx =
1265 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1266 int A16Idx =
1267 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1268 const AMDGPU::MIMGDimInfo *Dim =
1269 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1270 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1271
1272 AddrSize =
1273 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1274
1275 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1276 // VIMAGE insts other than BVH never use vaddr4.
1277 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1278 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1279 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1280 if (!IsNSA) {
1281 if (!IsVSample && AddrSize > 12)
1282 AddrSize = 16;
1283 } else {
1284 if (AddrSize > Info->VAddrDwords) {
1285 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1286 // The NSA encoding does not contain enough operands for the
1287 // combination of base opcode / dimension. Should this be an error?
1288 return;
1289 }
1290 IsPartialNSA = true;
1291 }
1292 }
1293 }
1294
1295 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1296 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1297
1298 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1299 if (D16 && AMDGPU::hasPackedD16(STI)) {
1300 DstSize = (DstSize + 1) / 2;
1301 }
1302
1303 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1304 DstSize += 1;
1305
1306 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1307 return;
1308
1309 int NewOpcode =
1310 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1311 if (NewOpcode == -1)
1312 return;
1313
1314 // Widen the register to the correct number of enabled channels.
1315 MCRegister NewVdata;
1316 if (DstSize != Info->VDataDwords) {
1317 auto DataRCID = MCII->getOpRegClassID(
1318 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1319
1320 // Get first subregister of VData
1321 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1322 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1323 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1324
1325 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1326 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1327 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1328 if (!NewVdata) {
1329 // It's possible to encode this such that the low register + enabled
1330 // components exceeds the register count.
1331 return;
1332 }
1333 }
1334
1335 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1336 // If using partial NSA on GFX11+ widen last address register.
1337 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1338 MCRegister NewVAddrSA;
1339 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1340 AddrSize != Info->VAddrDwords) {
1341 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1342 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1343 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1344
1345 auto AddrRCID = MCII->getOpRegClassID(
1346 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1347
1348 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1349 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1350 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1351 if (!NewVAddrSA)
1352 return;
1353 }
1354
1355 MI.setOpcode(NewOpcode);
1356
1357 if (NewVdata != AMDGPU::NoRegister) {
1358 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1359
1360 if (IsAtomic) {
1361 // Atomic operations have an additional operand (a copy of data)
1362 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1363 }
1364 }
1365
1366 if (NewVAddrSA) {
1367 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1368 } else if (IsNSA) {
1369 assert(AddrSize <= Info->VAddrDwords);
1370 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1371 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1372 }
1373}
1374
1375// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1376// decoder only adds to src_modifiers, so manually add the bits to the other
1377// operands.
1379 unsigned Opc = MI.getOpcode();
1380 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1381 auto Mods = collectVOPModifiers(MI, true);
1382
1383 if (MI.getNumOperands() < DescNumOps &&
1384 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1385 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1386
1387 if (MI.getNumOperands() < DescNumOps &&
1388 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1390 AMDGPU::OpName::op_sel);
1391 if (MI.getNumOperands() < DescNumOps &&
1392 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1394 AMDGPU::OpName::op_sel_hi);
1395 if (MI.getNumOperands() < DescNumOps &&
1396 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1398 AMDGPU::OpName::neg_lo);
1399 if (MI.getNumOperands() < DescNumOps &&
1400 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1402 AMDGPU::OpName::neg_hi);
1403}
1404
1405// Create dummy old operand and insert optional operands
1407 unsigned Opc = MI.getOpcode();
1408 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1409
1410 if (MI.getNumOperands() < DescNumOps &&
1411 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1412 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1413
1414 if (MI.getNumOperands() < DescNumOps &&
1415 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1417 AMDGPU::OpName::src0_modifiers);
1418
1419 if (MI.getNumOperands() < DescNumOps &&
1420 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1422 AMDGPU::OpName::src1_modifiers);
1423}
1424
1426 unsigned Opc = MI.getOpcode();
1427 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1428
1430
1431 if (MI.getNumOperands() < DescNumOps &&
1432 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1435 AMDGPU::OpName::op_sel);
1436 }
1437}
1438
1440 assert(HasLiteral && "Should have decoded a literal");
1441 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1442}
1443
1444const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1445 return getContext().getRegisterInfo()->
1446 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1447}
1448
1449inline
1451 const Twine& ErrMsg) const {
1452 *CommentStream << "Error: " + ErrMsg;
1453
1454 // ToDo: add support for error operands to MCInst.h
1455 // return MCOperand::createError(V);
1456 return MCOperand();
1457}
1458
1459inline
1462}
1463
1464inline
1466 unsigned Val) const {
1467 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1468 if (Val >= RegCl.getNumRegs())
1469 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1470 ": unknown register " + Twine(Val));
1471 return createRegOperand(RegCl.getRegister(Val));
1472}
1473
1474inline
1476 unsigned Val) const {
1477 // ToDo: SI/CI have 104 SGPRs, VI - 102
1478 // Valery: here we accepting as much as we can, let assembler sort it out
1479 int shift = 0;
1480 switch (SRegClassID) {
1481 case AMDGPU::SGPR_32RegClassID:
1482 case AMDGPU::TTMP_32RegClassID:
1483 break;
1484 case AMDGPU::SGPR_64RegClassID:
1485 case AMDGPU::TTMP_64RegClassID:
1486 shift = 1;
1487 break;
1488 case AMDGPU::SGPR_96RegClassID:
1489 case AMDGPU::TTMP_96RegClassID:
1490 case AMDGPU::SGPR_128RegClassID:
1491 case AMDGPU::TTMP_128RegClassID:
1492 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1493 // this bundle?
1494 case AMDGPU::SGPR_256RegClassID:
1495 case AMDGPU::TTMP_256RegClassID:
1496 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1497 // this bundle?
1498 case AMDGPU::SGPR_288RegClassID:
1499 case AMDGPU::TTMP_288RegClassID:
1500 case AMDGPU::SGPR_320RegClassID:
1501 case AMDGPU::TTMP_320RegClassID:
1502 case AMDGPU::SGPR_352RegClassID:
1503 case AMDGPU::TTMP_352RegClassID:
1504 case AMDGPU::SGPR_384RegClassID:
1505 case AMDGPU::TTMP_384RegClassID:
1506 case AMDGPU::SGPR_512RegClassID:
1507 case AMDGPU::TTMP_512RegClassID:
1508 shift = 2;
1509 break;
1510 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1511 // this bundle?
1512 default:
1513 llvm_unreachable("unhandled register class");
1514 }
1515
1516 if (Val % (1 << shift)) {
1517 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1518 << ": scalar reg isn't aligned " << Val;
1519 }
1520
1521 return createRegOperand(SRegClassID, Val >> shift);
1522}
1523
1525 bool IsHi) const {
1526 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1527 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1528}
1529
1530// Decode Literals for insts which always have a literal in the encoding
1533 if (HasLiteral) {
1534 assert(
1536 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1537 if (Literal != Val)
1538 return errOperand(Val, "More than one unique literal is illegal");
1539 }
1540 HasLiteral = true;
1541 Literal = Val;
1542 return MCOperand::createImm(Literal);
1543}
1544
1547 if (HasLiteral) {
1548 if (Literal != Val)
1549 return errOperand(Val, "More than one unique literal is illegal");
1550 }
1551 HasLiteral = true;
1552 Literal = Val;
1553
1554 bool UseLit64 = Hi_32(Literal) == 0;
1556 LitModifier::Lit64, Literal, getContext()))
1557 : MCOperand::createImm(Literal);
1558}
1559
1562 const MCOperandInfo &OpDesc) const {
1563 // For now all literal constants are supposed to be unsigned integer
1564 // ToDo: deal with signed/unsigned 64-bit integer constants
1565 // ToDo: deal with float/double constants
1566 if (!HasLiteral) {
1567 if (Bytes.size() < 4) {
1568 return errOperand(0, "cannot read literal, inst bytes left " +
1569 Twine(Bytes.size()));
1570 }
1571 HasLiteral = true;
1572 Literal = eatBytes<uint32_t>(Bytes);
1573 }
1574
1575 // For disassembling always assume all inline constants are available.
1576 bool HasInv2Pi = true;
1577
1578 // Invalid instruction codes may contain literals for inline-only
1579 // operands, so we support them here as well.
1580 int64_t Val = Literal;
1581 bool UseLit = false;
1582 switch (OpDesc.OperandType) {
1583 default:
1584 llvm_unreachable("Unexpected operand type!");
1588 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1589 break;
1592 break;
1596 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1597 break;
1599 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1600 break;
1602 break;
1606 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1607 break;
1609 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1610 break;
1620 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1621 break;
1625 Val <<= 32;
1626 break;
1629 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1630 break;
1632 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1633 // decoding a literal in a position of a register operand. Give
1634 // it special handling in the caller, decodeImmOperands(), instead
1635 // of quietly allowing it here.
1636 break;
1637 }
1638
1641 : MCOperand::createImm(Val);
1642}
1643
1645 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1646
1647 if (!HasLiteral) {
1648 if (Bytes.size() < 8) {
1649 return errOperand(0, "cannot read literal64, inst bytes left " +
1650 Twine(Bytes.size()));
1651 }
1652 HasLiteral = true;
1653 Literal = eatBytes<uint64_t>(Bytes);
1654 }
1655
1656 bool UseLit64 = Hi_32(Literal) == 0;
1658 LitModifier::Lit64, Literal, getContext()))
1659 : MCOperand::createImm(Literal);
1660}
1661
1663 using namespace AMDGPU::EncValues;
1664
1665 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1666 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1667 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1668 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1669 // Cast prevents negative overflow.
1670}
1671
1672static int64_t getInlineImmVal32(unsigned Imm) {
1673 switch (Imm) {
1674 case 240:
1675 return llvm::bit_cast<uint32_t>(0.5f);
1676 case 241:
1677 return llvm::bit_cast<uint32_t>(-0.5f);
1678 case 242:
1679 return llvm::bit_cast<uint32_t>(1.0f);
1680 case 243:
1681 return llvm::bit_cast<uint32_t>(-1.0f);
1682 case 244:
1683 return llvm::bit_cast<uint32_t>(2.0f);
1684 case 245:
1685 return llvm::bit_cast<uint32_t>(-2.0f);
1686 case 246:
1687 return llvm::bit_cast<uint32_t>(4.0f);
1688 case 247:
1689 return llvm::bit_cast<uint32_t>(-4.0f);
1690 case 248: // 1 / (2 * PI)
1691 return 0x3e22f983;
1692 default:
1693 llvm_unreachable("invalid fp inline imm");
1694 }
1695}
1696
1697static int64_t getInlineImmVal64(unsigned Imm) {
1698 switch (Imm) {
1699 case 240:
1700 return llvm::bit_cast<uint64_t>(0.5);
1701 case 241:
1702 return llvm::bit_cast<uint64_t>(-0.5);
1703 case 242:
1704 return llvm::bit_cast<uint64_t>(1.0);
1705 case 243:
1706 return llvm::bit_cast<uint64_t>(-1.0);
1707 case 244:
1708 return llvm::bit_cast<uint64_t>(2.0);
1709 case 245:
1710 return llvm::bit_cast<uint64_t>(-2.0);
1711 case 246:
1712 return llvm::bit_cast<uint64_t>(4.0);
1713 case 247:
1714 return llvm::bit_cast<uint64_t>(-4.0);
1715 case 248: // 1 / (2 * PI)
1716 return 0x3fc45f306dc9c882;
1717 default:
1718 llvm_unreachable("invalid fp inline imm");
1719 }
1720}
1721
1722static int64_t getInlineImmValF16(unsigned Imm) {
1723 switch (Imm) {
1724 case 240:
1725 return 0x3800;
1726 case 241:
1727 return 0xB800;
1728 case 242:
1729 return 0x3C00;
1730 case 243:
1731 return 0xBC00;
1732 case 244:
1733 return 0x4000;
1734 case 245:
1735 return 0xC000;
1736 case 246:
1737 return 0x4400;
1738 case 247:
1739 return 0xC400;
1740 case 248: // 1 / (2 * PI)
1741 return 0x3118;
1742 default:
1743 llvm_unreachable("invalid fp inline imm");
1744 }
1745}
1746
1747static int64_t getInlineImmValBF16(unsigned Imm) {
1748 switch (Imm) {
1749 case 240:
1750 return 0x3F00;
1751 case 241:
1752 return 0xBF00;
1753 case 242:
1754 return 0x3F80;
1755 case 243:
1756 return 0xBF80;
1757 case 244:
1758 return 0x4000;
1759 case 245:
1760 return 0xC000;
1761 case 246:
1762 return 0x4080;
1763 case 247:
1764 return 0xC080;
1765 case 248: // 1 / (2 * PI)
1766 return 0x3E22;
1767 default:
1768 llvm_unreachable("invalid fp inline imm");
1769 }
1770}
1771
1772unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1773 using namespace AMDGPU;
1774
1775 switch (Width) {
1776 case 16:
1777 case 32:
1778 return VGPR_32RegClassID;
1779 case 64:
1780 return VReg_64RegClassID;
1781 case 96:
1782 return VReg_96RegClassID;
1783 case 128:
1784 return VReg_128RegClassID;
1785 case 160:
1786 return VReg_160RegClassID;
1787 case 192:
1788 return VReg_192RegClassID;
1789 case 256:
1790 return VReg_256RegClassID;
1791 case 288:
1792 return VReg_288RegClassID;
1793 case 320:
1794 return VReg_320RegClassID;
1795 case 352:
1796 return VReg_352RegClassID;
1797 case 384:
1798 return VReg_384RegClassID;
1799 case 512:
1800 return VReg_512RegClassID;
1801 case 1024:
1802 return VReg_1024RegClassID;
1803 }
1804 llvm_unreachable("Invalid register width!");
1805}
1806
1807unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1808 using namespace AMDGPU;
1809
1810 switch (Width) {
1811 case 16:
1812 case 32:
1813 return AGPR_32RegClassID;
1814 case 64:
1815 return AReg_64RegClassID;
1816 case 96:
1817 return AReg_96RegClassID;
1818 case 128:
1819 return AReg_128RegClassID;
1820 case 160:
1821 return AReg_160RegClassID;
1822 case 256:
1823 return AReg_256RegClassID;
1824 case 288:
1825 return AReg_288RegClassID;
1826 case 320:
1827 return AReg_320RegClassID;
1828 case 352:
1829 return AReg_352RegClassID;
1830 case 384:
1831 return AReg_384RegClassID;
1832 case 512:
1833 return AReg_512RegClassID;
1834 case 1024:
1835 return AReg_1024RegClassID;
1836 }
1837 llvm_unreachable("Invalid register width!");
1838}
1839
1840unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1841 using namespace AMDGPU;
1842
1843 switch (Width) {
1844 case 16:
1845 case 32:
1846 return SGPR_32RegClassID;
1847 case 64:
1848 return SGPR_64RegClassID;
1849 case 96:
1850 return SGPR_96RegClassID;
1851 case 128:
1852 return SGPR_128RegClassID;
1853 case 160:
1854 return SGPR_160RegClassID;
1855 case 256:
1856 return SGPR_256RegClassID;
1857 case 288:
1858 return SGPR_288RegClassID;
1859 case 320:
1860 return SGPR_320RegClassID;
1861 case 352:
1862 return SGPR_352RegClassID;
1863 case 384:
1864 return SGPR_384RegClassID;
1865 case 512:
1866 return SGPR_512RegClassID;
1867 }
1868 llvm_unreachable("Invalid register width!");
1869}
1870
1871unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1872 using namespace AMDGPU;
1873
1874 switch (Width) {
1875 case 16:
1876 case 32:
1877 return TTMP_32RegClassID;
1878 case 64:
1879 return TTMP_64RegClassID;
1880 case 128:
1881 return TTMP_128RegClassID;
1882 case 256:
1883 return TTMP_256RegClassID;
1884 case 288:
1885 return TTMP_288RegClassID;
1886 case 320:
1887 return TTMP_320RegClassID;
1888 case 352:
1889 return TTMP_352RegClassID;
1890 case 384:
1891 return TTMP_384RegClassID;
1892 case 512:
1893 return TTMP_512RegClassID;
1894 }
1895 llvm_unreachable("Invalid register width!");
1896}
1897
1898int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1899 using namespace AMDGPU::EncValues;
1900
1901 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1902 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1903
1904 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1905}
1906
1908 unsigned Val) const {
1909 using namespace AMDGPU::EncValues;
1910
1911 assert(Val < 1024); // enum10
1912
1913 bool IsAGPR = Val & 512;
1914 Val &= 511;
1915
1916 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1917 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1918 : getVgprClassId(Width), Val - VGPR_MIN);
1919 }
1920 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1921}
1922
1924 unsigned Width,
1925 unsigned Val) const {
1926 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1927 // decoded earlier.
1928 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1929 using namespace AMDGPU::EncValues;
1930
1931 if (Val <= SGPR_MAX) {
1932 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1933 static_assert(SGPR_MIN == 0);
1934 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1935 }
1936
1937 int TTmpIdx = getTTmpIdx(Val);
1938 if (TTmpIdx >= 0) {
1939 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1940 }
1941
1942 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1943 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1944 Val == LITERAL_CONST)
1945 return MCOperand::createImm(Val);
1946
1947 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1948 return decodeLiteral64Constant();
1949 }
1950
1951 switch (Width) {
1952 case 32:
1953 case 16:
1954 return decodeSpecialReg32(Val);
1955 case 64:
1956 return decodeSpecialReg64(Val);
1957 case 96:
1958 case 128:
1959 case 256:
1960 case 512:
1961 return decodeSpecialReg96Plus(Val);
1962 default:
1963 llvm_unreachable("unexpected immediate type");
1964 }
1965}
1966
1967// Bit 0 of DstY isn't stored in the instruction, because it's always the
1968// opposite of bit 0 of DstX.
1970 unsigned Val) const {
1971 int VDstXInd =
1972 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1973 assert(VDstXInd != -1);
1974 assert(Inst.getOperand(VDstXInd).isReg());
1975 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1976 Val |= ~XDstReg & 1;
1977 return createRegOperand(getVgprClassId(32), Val);
1978}
1979
1981 using namespace AMDGPU;
1982
1983 switch (Val) {
1984 // clang-format off
1985 case 102: return createRegOperand(FLAT_SCR_LO);
1986 case 103: return createRegOperand(FLAT_SCR_HI);
1987 case 104: return createRegOperand(XNACK_MASK_LO);
1988 case 105: return createRegOperand(XNACK_MASK_HI);
1989 case 106: return createRegOperand(VCC_LO);
1990 case 107: return createRegOperand(VCC_HI);
1991 case 108: return createRegOperand(TBA_LO);
1992 case 109: return createRegOperand(TBA_HI);
1993 case 110: return createRegOperand(TMA_LO);
1994 case 111: return createRegOperand(TMA_HI);
1995 case 124:
1996 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1997 case 125:
1998 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1999 case 126: return createRegOperand(EXEC_LO);
2000 case 127: return createRegOperand(EXEC_HI);
2001 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2002 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2003 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2004 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2005 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2006 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2007 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2008 case 251: return createRegOperand(SRC_VCCZ);
2009 case 252: return createRegOperand(SRC_EXECZ);
2010 case 253: return createRegOperand(SRC_SCC);
2011 case 254: return createRegOperand(LDS_DIRECT);
2012 default: break;
2013 // clang-format on
2014 }
2015 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2016}
2017
2019 using namespace AMDGPU;
2020
2021 switch (Val) {
2022 case 102: return createRegOperand(FLAT_SCR);
2023 case 104: return createRegOperand(XNACK_MASK);
2024 case 106: return createRegOperand(VCC);
2025 case 108: return createRegOperand(TBA);
2026 case 110: return createRegOperand(TMA);
2027 case 124:
2028 if (isGFX11Plus())
2029 return createRegOperand(SGPR_NULL);
2030 break;
2031 case 125:
2032 if (!isGFX11Plus())
2033 return createRegOperand(SGPR_NULL);
2034 break;
2035 case 126: return createRegOperand(EXEC);
2036 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2037 case 235: return createRegOperand(SRC_SHARED_BASE);
2038 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2039 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2040 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2041 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2042 case 251: return createRegOperand(SRC_VCCZ);
2043 case 252: return createRegOperand(SRC_EXECZ);
2044 case 253: return createRegOperand(SRC_SCC);
2045 default: break;
2046 }
2047 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2048}
2049
2051 using namespace AMDGPU;
2052
2053 switch (Val) {
2054 case 124:
2055 if (isGFX11Plus())
2056 return createRegOperand(SGPR_NULL);
2057 break;
2058 case 125:
2059 if (!isGFX11Plus())
2060 return createRegOperand(SGPR_NULL);
2061 break;
2062 default:
2063 break;
2064 }
2065 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2066}
2067
2069 const unsigned Val) const {
2070 using namespace AMDGPU::SDWA;
2071 using namespace AMDGPU::EncValues;
2072
2073 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2074 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2075 // XXX: cast to int is needed to avoid stupid warning:
2076 // compare with unsigned is always true
2077 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2078 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2079 return createRegOperand(getVgprClassId(Width),
2080 Val - SDWA9EncValues::SRC_VGPR_MIN);
2081 }
2082 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2083 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2084 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2085 return createSRegOperand(getSgprClassId(Width),
2086 Val - SDWA9EncValues::SRC_SGPR_MIN);
2087 }
2088 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2089 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2090 return createSRegOperand(getTtmpClassId(Width),
2091 Val - SDWA9EncValues::SRC_TTMP_MIN);
2092 }
2093
2094 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2095
2096 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2097 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2098 return MCOperand::createImm(SVal);
2099
2100 return decodeSpecialReg32(SVal);
2101 }
2102 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2103 return createRegOperand(getVgprClassId(Width), Val);
2104 llvm_unreachable("unsupported target");
2105}
2106
2108 return decodeSDWASrc(16, Val);
2109}
2110
2112 return decodeSDWASrc(32, Val);
2113}
2114
2116 using namespace AMDGPU::SDWA;
2117
2118 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2119 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2120 "SDWAVopcDst should be present only on GFX9+");
2121
2122 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2123
2124 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2125 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2126
2127 int TTmpIdx = getTTmpIdx(Val);
2128 if (TTmpIdx >= 0) {
2129 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2130 return createSRegOperand(TTmpClsId, TTmpIdx);
2131 }
2132 if (Val > SGPR_MAX) {
2133 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2134 }
2135 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2136 }
2137 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2138}
2139
2141 unsigned Val) const {
2142 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2143 ? decodeSrcOp(Inst, 32, Val)
2144 : decodeSrcOp(Inst, 64, Val);
2145}
2146
2148 unsigned Val) const {
2149 return decodeSrcOp(Inst, 32, Val);
2150}
2151
2154 return MCOperand();
2155 return MCOperand::createImm(Val);
2156}
2157
2159 using VersionField = AMDGPU::EncodingField<7, 0>;
2160 using W64Bit = AMDGPU::EncodingBit<13>;
2161 using W32Bit = AMDGPU::EncodingBit<14>;
2162 using MDPBit = AMDGPU::EncodingBit<15>;
2164
2165 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2166
2167 // Decode into a plain immediate if any unused bits are raised.
2168 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2169 return MCOperand::createImm(Imm);
2170
2171 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2172 const auto *I = find_if(
2173 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2174 return V.Code == Version;
2175 });
2176 MCContext &Ctx = getContext();
2177 const MCExpr *E;
2178 if (I == Versions.end())
2180 else
2181 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2182
2183 if (W64)
2184 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2185 if (W32)
2186 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2187 if (MDP)
2188 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2189
2190 return MCOperand::createExpr(E);
2191}
2192
2194 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2195}
2196
2198
2200 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2201}
2202
2204
2206
2210
2212 return STI.hasFeature(AMDGPU::FeatureGFX11);
2213}
2214
2218
2220 return STI.hasFeature(AMDGPU::FeatureGFX12);
2221}
2222
2226
2228
2230 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2231}
2232
2236
2237//===----------------------------------------------------------------------===//
2238// AMDGPU specific symbol handling
2239//===----------------------------------------------------------------------===//
2240
2241/// Print a string describing the reserved bit range specified by Mask with
2242/// offset BaseBytes for use in error comments. Mask is a single continuous
2243/// range of 1s surrounded by zeros. The format here is meant to align with the
2244/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2245static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2246 SmallString<32> Result;
2247 raw_svector_ostream S(Result);
2248
2249 int TrailingZeros = llvm::countr_zero(Mask);
2250 int PopCount = llvm::popcount(Mask);
2251
2252 if (PopCount == 1) {
2253 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2254 } else {
2255 S << "bits in range ("
2256 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2257 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2258 }
2259
2260 return Result;
2261}
2262
2263#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2264#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2265 do { \
2266 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2267 } while (0)
2268#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2269 do { \
2270 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2271 << GET_FIELD(MASK) << '\n'; \
2272 } while (0)
2273
2274#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2275 do { \
2276 if (FourByteBuffer & (MASK)) { \
2277 return createStringError(std::errc::invalid_argument, \
2278 "kernel descriptor " DESC \
2279 " reserved %s set" MSG, \
2280 getBitRangeFromMask((MASK), 0).c_str()); \
2281 } \
2282 } while (0)
2283
2284#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2285#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2286 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2287#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2288 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2289#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2290 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2291
2292// NOLINTNEXTLINE(readability-identifier-naming)
2294 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2295 using namespace amdhsa;
2296 StringRef Indent = "\t";
2297
2298 // We cannot accurately backward compute #VGPRs used from
2299 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2300 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2301 // simply calculate the inverse of what the assembler does.
2302
2303 uint32_t GranulatedWorkitemVGPRCount =
2304 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2305
2306 uint32_t NextFreeVGPR =
2307 (GranulatedWorkitemVGPRCount + 1) *
2308 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2309
2310 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2311
2312 // We cannot backward compute values used to calculate
2313 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2314 // directives can't be computed:
2315 // .amdhsa_reserve_vcc
2316 // .amdhsa_reserve_flat_scratch
2317 // .amdhsa_reserve_xnack_mask
2318 // They take their respective default values if not specified in the assembly.
2319 //
2320 // GRANULATED_WAVEFRONT_SGPR_COUNT
2321 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2322 //
2323 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2324 // are set to 0. So while disassembling we consider that:
2325 //
2326 // GRANULATED_WAVEFRONT_SGPR_COUNT
2327 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2328 //
2329 // The disassembler cannot recover the original values of those 3 directives.
2330
2331 uint32_t GranulatedWavefrontSGPRCount =
2332 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2333
2334 if (isGFX10Plus())
2335 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2336 "must be zero on gfx10+");
2337
2338 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2340
2341 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2343 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2344 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2345 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2346 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2347 << '\n';
2348 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2349
2350 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2351
2352 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2353 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2354 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2355 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2356 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2357 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2358 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2359 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2360
2361 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2362
2363 if (!isGFX12Plus())
2364 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2365 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2366
2367 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2368
2369 if (!isGFX12Plus())
2370 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2371 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2372
2373 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2374 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2375
2376 // Bits [26].
2377 if (isGFX9Plus()) {
2378 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2379 } else {
2380 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2381 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2382 }
2383
2384 // Bits [27].
2385 if (isGFX1250()) {
2386 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2387 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2388 } else {
2389 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2390 "COMPUTE_PGM_RSRC1");
2391 }
2392
2393 // Bits [28].
2394 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2395
2396 // Bits [29-31].
2397 if (isGFX10Plus()) {
2398 // WGP_MODE is not available on GFX1250.
2399 if (!isGFX1250()) {
2400 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2401 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2402 }
2403 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2404 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2405 } else {
2406 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2407 "COMPUTE_PGM_RSRC1");
2408 }
2409
2410 if (isGFX12Plus())
2411 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2412 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2413
2414 return true;
2415}
2416
2417// NOLINTNEXTLINE(readability-identifier-naming)
2419 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2420 using namespace amdhsa;
2421 StringRef Indent = "\t";
2423 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2424 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2425 else
2426 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2427 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2428 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2429 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2430 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2431 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2432 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2433 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2434 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2435 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2436 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2437 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2438
2439 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2440 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2441 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2442
2444 ".amdhsa_exception_fp_ieee_invalid_op",
2445 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2446 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2447 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2449 ".amdhsa_exception_fp_ieee_div_zero",
2450 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2451 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2452 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2453 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2454 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2455 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2456 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2457 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2458 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2459
2460 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2461
2462 return true;
2463}
2464
2465// NOLINTNEXTLINE(readability-identifier-naming)
2467 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2468 using namespace amdhsa;
2469 StringRef Indent = "\t";
2470 if (isGFX90A()) {
2471 KdStream << Indent << ".amdhsa_accum_offset "
2472 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2473 << '\n';
2474
2475 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2476
2477 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2478 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2479 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2480 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2481 } else if (isGFX10Plus()) {
2482 // Bits [0-3].
2483 if (!isGFX12Plus()) {
2484 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2485 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2486 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2487 } else {
2489 "SHARED_VGPR_COUNT",
2490 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2491 }
2492 } else {
2493 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2494 "COMPUTE_PGM_RSRC3",
2495 "must be zero on gfx12+");
2496 }
2497
2498 // Bits [4-11].
2499 if (isGFX11()) {
2500 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2501 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2502 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2503 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2504 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2505 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2506 } else if (isGFX12Plus()) {
2507 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2508 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2509 } else {
2510 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2511 "COMPUTE_PGM_RSRC3",
2512 "must be zero on gfx10");
2513 }
2514
2515 // Bits [12].
2516 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2517 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2518
2519 // Bits [13].
2520 if (isGFX12Plus()) {
2522 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2523 } else {
2524 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2525 "COMPUTE_PGM_RSRC3",
2526 "must be zero on gfx10 or gfx11");
2527 }
2528
2529 // Bits [14-21].
2530 if (isGFX1250()) {
2531 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2532 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2534 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2536 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2538 "ENABLE_DIDT_THROTTLE",
2539 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2540 } else {
2541 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2542 "COMPUTE_PGM_RSRC3",
2543 "must be zero on gfx10+");
2544 }
2545
2546 // Bits [22-30].
2547 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2548 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2549
2550 // Bits [31].
2551 if (isGFX11Plus()) {
2553 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2554 } else {
2555 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2556 "COMPUTE_PGM_RSRC3",
2557 "must be zero on gfx10");
2558 }
2559 } else if (FourByteBuffer) {
2560 return createStringError(
2561 std::errc::invalid_argument,
2562 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2563 }
2564 return true;
2565}
2566#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2567#undef PRINT_DIRECTIVE
2568#undef GET_FIELD
2569#undef CHECK_RESERVED_BITS_IMPL
2570#undef CHECK_RESERVED_BITS
2571#undef CHECK_RESERVED_BITS_MSG
2572#undef CHECK_RESERVED_BITS_DESC
2573#undef CHECK_RESERVED_BITS_DESC_MSG
2574
2575/// Create an error object to return from onSymbolStart for reserved kernel
2576/// descriptor bits being set.
2577static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2578 const char *Msg = "") {
2579 return createStringError(
2580 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2581 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2582}
2583
2584/// Create an error object to return from onSymbolStart for reserved kernel
2585/// descriptor bytes being set.
2586static Error createReservedKDBytesError(unsigned BaseInBytes,
2587 unsigned WidthInBytes) {
2588 // Create an error comment in the same format as the "Kernel Descriptor"
2589 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2590 return createStringError(
2591 std::errc::invalid_argument,
2592 "kernel descriptor reserved bits in range (%u:%u) set",
2593 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2594}
2595
2598 raw_string_ostream &KdStream) const {
2599#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2600 do { \
2601 KdStream << Indent << DIRECTIVE " " \
2602 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2603 } while (0)
2604
2605 uint16_t TwoByteBuffer = 0;
2606 uint32_t FourByteBuffer = 0;
2607
2608 StringRef ReservedBytes;
2609 StringRef Indent = "\t";
2610
2611 assert(Bytes.size() == 64);
2612 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2613
2614 switch (Cursor.tell()) {
2616 FourByteBuffer = DE.getU32(Cursor);
2617 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2618 << '\n';
2619 return true;
2620
2622 FourByteBuffer = DE.getU32(Cursor);
2623 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2624 << FourByteBuffer << '\n';
2625 return true;
2626
2628 FourByteBuffer = DE.getU32(Cursor);
2629 KdStream << Indent << ".amdhsa_kernarg_size "
2630 << FourByteBuffer << '\n';
2631 return true;
2632
2634 // 4 reserved bytes, must be 0.
2635 ReservedBytes = DE.getBytes(Cursor, 4);
2636 for (int I = 0; I < 4; ++I) {
2637 if (ReservedBytes[I] != 0)
2639 }
2640 return true;
2641
2643 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2644 // So far no directive controls this for Code Object V3, so simply skip for
2645 // disassembly.
2646 DE.skip(Cursor, 8);
2647 return true;
2648
2650 // 20 reserved bytes, must be 0.
2651 ReservedBytes = DE.getBytes(Cursor, 20);
2652 for (int I = 0; I < 20; ++I) {
2653 if (ReservedBytes[I] != 0)
2655 }
2656 return true;
2657
2659 FourByteBuffer = DE.getU32(Cursor);
2660 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2661
2663 FourByteBuffer = DE.getU32(Cursor);
2664 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2665
2667 FourByteBuffer = DE.getU32(Cursor);
2668 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2669
2671 using namespace amdhsa;
2672 TwoByteBuffer = DE.getU16(Cursor);
2673
2675 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2676 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2677 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2678 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2679 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2680 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2681 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2682 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2683 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2684 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2686 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2687 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2688 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2689 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2690
2691 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2692 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2694
2695 // Reserved for GFX9
2696 if (isGFX9() &&
2697 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2699 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2700 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2701 }
2702 if (isGFX10Plus()) {
2703 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2704 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2705 }
2706
2707 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2708 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2709 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2710
2711 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2712 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2714 }
2715
2716 return true;
2717
2719 using namespace amdhsa;
2720 TwoByteBuffer = DE.getU16(Cursor);
2721 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2722 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2723 KERNARG_PRELOAD_SPEC_LENGTH);
2724 }
2725
2726 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2727 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2728 KERNARG_PRELOAD_SPEC_OFFSET);
2729 }
2730 return true;
2731
2733 // 4 bytes from here are reserved, must be 0.
2734 ReservedBytes = DE.getBytes(Cursor, 4);
2735 for (int I = 0; I < 4; ++I) {
2736 if (ReservedBytes[I] != 0)
2738 }
2739 return true;
2740
2741 default:
2742 llvm_unreachable("Unhandled index. Case statements cover everything.");
2743 return true;
2744 }
2745#undef PRINT_DIRECTIVE
2746}
2747
2749 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2750
2751 // CP microcode requires the kernel descriptor to be 64 aligned.
2752 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2753 return createStringError(std::errc::invalid_argument,
2754 "kernel descriptor must be 64-byte aligned");
2755
2756 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2757 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2758 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2759 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2760 // when required.
2761 if (isGFX10Plus()) {
2762 uint16_t KernelCodeProperties =
2765 EnableWavefrontSize32 =
2766 AMDHSA_BITS_GET(KernelCodeProperties,
2767 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2768 }
2769
2770 std::string Kd;
2771 raw_string_ostream KdStream(Kd);
2772 KdStream << ".amdhsa_kernel " << KdName << '\n';
2773
2775 while (C && C.tell() < Bytes.size()) {
2776 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2777
2778 cantFail(C.takeError());
2779
2780 if (!Res)
2781 return Res;
2782 }
2783 KdStream << ".end_amdhsa_kernel\n";
2784 outs() << KdStream.str();
2785 return true;
2786}
2787
2789 uint64_t &Size,
2790 ArrayRef<uint8_t> Bytes,
2791 uint64_t Address) const {
2792 // Right now only kernel descriptor needs to be handled.
2793 // We ignore all other symbols for target specific handling.
2794 // TODO:
2795 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2796 // Object V2 and V3 when symbols are marked protected.
2797
2798 // amd_kernel_code_t for Code Object V2.
2799 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2800 Size = 256;
2801 return createStringError(std::errc::invalid_argument,
2802 "code object v2 is not supported");
2803 }
2804
2805 // Code Object V3 kernel descriptors.
2806 StringRef Name = Symbol.Name;
2807 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2808 Size = 64; // Size = 64 regardless of success or failure.
2809 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2810 }
2811
2812 return false;
2813}
2814
2815const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2816 int64_t Val) {
2817 MCContext &Ctx = getContext();
2818 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2819 // Note: only set value to Val on a new symbol in case an dissassembler
2820 // has already been initialized in this context.
2821 if (!Sym->isVariable()) {
2823 } else {
2824 int64_t Res = ~Val;
2825 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2826 if (!Valid || Res != Val)
2827 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2828 }
2829 return MCSymbolRefExpr::create(Sym, Ctx);
2830}
2831
2833 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2834
2835 // Check for MUBUF and MTBUF instructions
2836 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2837 return true;
2838
2839 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2840 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2841 return true;
2842
2843 return false;
2844}
2845
2846//===----------------------------------------------------------------------===//
2847// AMDGPUSymbolizer
2848//===----------------------------------------------------------------------===//
2849
2850// Try to find symbol name for specified label
2852 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2853 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2854 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2855
2856 if (!IsBranch) {
2857 return false;
2858 }
2859
2860 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2861 if (!Symbols)
2862 return false;
2863
2864 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2865 return Val.Addr == static_cast<uint64_t>(Value) &&
2866 Val.Type == ELF::STT_NOTYPE;
2867 });
2868 if (Result != Symbols->end()) {
2869 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2870 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2872 return true;
2873 }
2874 // Add to list of referenced addresses, so caller can synthesize a label.
2875 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2876 return false;
2877}
2878
2880 int64_t Value,
2881 uint64_t Address) {
2882 llvm_unreachable("unimplemented");
2883}
2884
2885//===----------------------------------------------------------------------===//
2886// Initialization
2887//===----------------------------------------------------------------------===//
2888
2890 LLVMOpInfoCallback /*GetOpInfo*/,
2891 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2892 void *DisInfo,
2893 MCContext *Ctx,
2894 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2895 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2896}
2897
2899 const MCSubtargetInfo &STI,
2900 MCContext &Ctx) {
2901 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2902}
2903
2904extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
const T * data() const
Definition ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1413
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1427
@ STT_OBJECT
Definition ELF.h:1414
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:574
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.