LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
537 Imm = getInlineImmValF16(Imm);
538 break;
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX11() &&
594 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX1250() &&
599 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX12() &&
604 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
610 break;
611
612 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
613 // Return 8 bytes for a potential literal.
614 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
615
616 if (isGFX1250() &&
617 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
618 break;
619 }
620
621 // Reinitialize Bytes
622 Bytes = Bytes_.slice(0, MaxInstBytesNum);
623
624 } else if (Bytes.size() >= 16 &&
625 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
626 std::bitset<128> DecW = eat16Bytes(Bytes);
627 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
628 break;
629
630 // Reinitialize Bytes
631 Bytes = Bytes_.slice(0, MaxInstBytesNum);
632 }
633
634 if (Bytes.size() >= 8) {
635 const uint64_t QW = eatBytes<uint64_t>(Bytes);
636
637 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
638 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
639 break;
640
641 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
642 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
643 break;
644
645 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
646 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
647 break;
648
649 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
650 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
651 // table first so we print the correct name.
652 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
653 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
654 break;
655
656 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
657 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
658 break;
659
660 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
661 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
662 break;
663
664 if ((isVI() || isGFX9()) &&
665 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
666 break;
667
668 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
669 break;
670
671 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
672 break;
673
674 if (isGFX1250() &&
675 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
676 QW, Address, CS))
677 break;
678
679 if (isGFX12() &&
680 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
681 Address, CS))
682 break;
683
684 if (isGFX11() &&
685 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
686 Address, CS))
687 break;
688
689 if (isGFX11() &&
690 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
691 break;
692
693 if (isGFX12() &&
694 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
695 break;
696
697 // Reinitialize Bytes
698 Bytes = Bytes_.slice(0, MaxInstBytesNum);
699 }
700
701 // Try decode 32-bit instruction
702 if (Bytes.size() >= 4) {
703 const uint32_t DW = eatBytes<uint32_t>(Bytes);
704
705 if ((isVI() || isGFX9()) &&
706 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
707 break;
708
709 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
710 break;
711
712 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
713 break;
714
715 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
716 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
717 break;
718
719 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
720 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
721 break;
722
723 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
724 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
725 break;
726
727 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
728 break;
729
730 if (isGFX11() &&
731 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
732 Address, CS))
733 break;
734
735 if (isGFX1250() &&
736 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
737 DW, Address, CS))
738 break;
739
740 if (isGFX12() &&
741 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
742 Address, CS))
743 break;
744 }
745
747 } while (false);
748
750
751 decodeImmOperands(MI, *MCII);
752
753 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
754 if (isMacDPP(MI))
756
757 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
759 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
760 convertVOPCDPPInst(MI); // Special VOP3 case
761 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
762 convertVOPC64DPPInst(MI); // Special VOP3 case
763 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
764 -1)
766 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
767 convertVOP3DPPInst(MI); // Regular VOP3 case
768 }
769
771
772 if (AMDGPU::isMAC(MI.getOpcode())) {
773 // Insert dummy unused src2_modifiers.
775 AMDGPU::OpName::src2_modifiers);
776 }
777
778 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
779 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
780 // Insert dummy unused src2_modifiers.
782 AMDGPU::OpName::src2_modifiers);
783 }
784
785 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
787 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
788 }
789
790 if (MCII->get(MI.getOpcode()).TSFlags &
792 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
793 AMDGPU::OpName::cpol);
794 if (CPolPos != -1) {
795 unsigned CPol =
796 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
798 if (MI.getNumOperands() <= (unsigned)CPolPos) {
800 AMDGPU::OpName::cpol);
801 } else if (CPol) {
802 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
803 }
804 }
805 }
806
807 if ((MCII->get(MI.getOpcode()).TSFlags &
809 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
810 // GFX90A lost TFE, its place is occupied by ACC.
811 int TFEOpIdx =
812 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
813 if (TFEOpIdx != -1) {
814 auto *TFEIter = MI.begin();
815 std::advance(TFEIter, TFEOpIdx);
816 MI.insert(TFEIter, MCOperand::createImm(0));
817 }
818 }
819
820 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
822 int OffsetIdx =
823 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
824 if (OffsetIdx != -1) {
825 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
826 int64_t SignedOffset = SignExtend64<24>(Imm);
827 if (SignedOffset < 0)
829 }
830 }
831
832 if (MCII->get(MI.getOpcode()).TSFlags &
834 int SWZOpIdx =
835 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
836 if (SWZOpIdx != -1) {
837 auto *SWZIter = MI.begin();
838 std::advance(SWZIter, SWZOpIdx);
839 MI.insert(SWZIter, MCOperand::createImm(0));
840 }
841 }
842
843 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
844 if (Desc.TSFlags & SIInstrFlags::MIMG) {
845 int VAddr0Idx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
847 int RsrcIdx =
848 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
849 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
850 if (VAddr0Idx >= 0 && NSAArgs > 0) {
851 unsigned NSAWords = (NSAArgs + 3) / 4;
852 if (Bytes.size() < 4 * NSAWords)
854 for (unsigned i = 0; i < NSAArgs; ++i) {
855 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
856 auto VAddrRCID =
857 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
858 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
859 }
860 Bytes = Bytes.slice(4 * NSAWords);
861 }
862
864 }
865
866 if (MCII->get(MI.getOpcode()).TSFlags &
869
870 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
872
873 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
875
876 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
878
879 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
881
882 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
884
885 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
886 AMDGPU::OpName::vdst_in);
887 if (VDstIn_Idx != -1) {
888 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
890 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
891 !MI.getOperand(VDstIn_Idx).isReg() ||
892 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
893 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
894 MI.erase(&MI.getOperand(VDstIn_Idx));
896 MCOperand::createReg(MI.getOperand(Tied).getReg()),
897 AMDGPU::OpName::vdst_in);
898 }
899 }
900
901 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
902 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
904
905 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
906 // have EXEC as implicit destination. Issue a warning if encoding for
907 // vdst is not EXEC.
908 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
909 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
910 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
911 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
912 if (Bytes_[0] != ExecEncoding)
914 }
915
916 Size = MaxInstBytesNum - Bytes.size();
917 return Status;
918}
919
921 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
922 // The MCInst still has these fields even though they are no longer encoded
923 // in the GFX11 instruction.
924 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
925 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
926 }
927}
928
931 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
933 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
934 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
935 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
936 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
937 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
938 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
939 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
940 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
941 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
942 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
943 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
944 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
945 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
946 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
947 // The MCInst has this field that is not directly encoded in the
948 // instruction.
949 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
950 }
951}
952
954 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
955 STI.hasFeature(AMDGPU::FeatureGFX10)) {
956 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
957 // VOPC - insert clamp
958 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
959 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
960 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
961 if (SDst != -1) {
962 // VOPC - insert VCC register as sdst
964 AMDGPU::OpName::sdst);
965 } else {
966 // VOP1/2 - insert omod if present in instruction
967 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
968 }
969 }
970}
971
972/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
973/// appropriate subregister for the used format width.
975 MCOperand &MO, uint8_t NumRegs) {
976 switch (NumRegs) {
977 case 4:
978 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
979 case 6:
980 return MO.setReg(
981 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
982 case 8:
983 if (MCRegister NewReg = MRI.getSubReg(
984 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
985 MO.setReg(NewReg);
986 }
987 return;
988 case 12: {
989 // There is no 384-bit subreg index defined.
990 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
991 MCRegister NewReg = MRI.getMatchingSuperReg(
992 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
993 return MO.setReg(NewReg);
994 }
995 case 16:
996 // No-op in cases where one operand is still f8/bf8.
997 return;
998 default:
999 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1000 }
1001}
1002
1003/// f8f6f4 instructions have different pseudos depending on the used formats. In
1004/// the disassembler table, we only have the variants with the largest register
1005/// classes which assume using an fp8/bf8 format for both operands. The actual
1006/// register class depends on the format in blgp and cbsz operands. Adjust the
1007/// register classes depending on the used format.
1009 int BlgpIdx =
1010 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1011 if (BlgpIdx == -1)
1012 return;
1013
1014 int CbszIdx =
1015 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1016
1017 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1018 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1019
1020 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1021 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1022 if (!AdjustedRegClassOpcode ||
1023 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1024 return;
1025
1026 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1027 int Src0Idx =
1028 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1029 int Src1Idx =
1030 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1031 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1032 AdjustedRegClassOpcode->NumRegsSrcA);
1033 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1034 AdjustedRegClassOpcode->NumRegsSrcB);
1035}
1036
1038 int FmtAIdx =
1039 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1040 if (FmtAIdx == -1)
1041 return;
1042
1043 int FmtBIdx =
1044 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1045
1046 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1047 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1048
1049 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1050 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1051 if (!AdjustedRegClassOpcode ||
1052 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1053 return;
1054
1055 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1056 int Src0Idx =
1057 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1058 int Src1Idx =
1059 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1060 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1061 AdjustedRegClassOpcode->NumRegsSrcA);
1062 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1063 AdjustedRegClassOpcode->NumRegsSrcB);
1064}
1065
1067 unsigned OpSel = 0;
1068 unsigned OpSelHi = 0;
1069 unsigned NegLo = 0;
1070 unsigned NegHi = 0;
1071};
1072
1073// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1074// Note that these values do not affect disassembler output,
1075// so this is only necessary for consistency with src_modifiers.
1077 bool IsVOP3P = false) {
1078 VOPModifiers Modifiers;
1079 unsigned Opc = MI.getOpcode();
1080 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1081 AMDGPU::OpName::src1_modifiers,
1082 AMDGPU::OpName::src2_modifiers};
1083 for (int J = 0; J < 3; ++J) {
1084 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1085 if (OpIdx == -1)
1086 continue;
1087
1088 unsigned Val = MI.getOperand(OpIdx).getImm();
1089
1090 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1091 if (IsVOP3P) {
1092 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1093 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1094 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1095 } else if (J == 0) {
1096 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1097 }
1098 }
1099
1100 return Modifiers;
1101}
1102
1103// Instructions decode the op_sel/suffix bits into the src_modifier
1104// operands. Copy those bits into the src operands for true16 VGPRs.
1106 const unsigned Opc = MI.getOpcode();
1107 const MCRegisterClass &ConversionRC =
1108 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1109 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1110 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1112 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1114 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1116 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1118 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1119 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1120 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1121 if (OpIdx == -1 || OpModsIdx == -1)
1122 continue;
1123 MCOperand &Op = MI.getOperand(OpIdx);
1124 if (!Op.isReg())
1125 continue;
1126 if (!ConversionRC.contains(Op.getReg()))
1127 continue;
1128 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1129 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1130 unsigned ModVal = OpMods.getImm();
1131 if (ModVal & OpSelMask) { // isHi
1132 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1133 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1134 }
1135 }
1136}
1137
1138// MAC opcodes have special old and src2 operands.
1139// src2 is tied to dst, while old is not tied (but assumed to be).
1141 constexpr int DST_IDX = 0;
1142 auto Opcode = MI.getOpcode();
1143 const auto &Desc = MCII->get(Opcode);
1144 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1145
1146 if (OldIdx != -1 && Desc.getOperandConstraint(
1147 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1148 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1149 assert(Desc.getOperandConstraint(
1150 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1152 (void)DST_IDX;
1153 return true;
1154 }
1155
1156 return false;
1157}
1158
1159// Create dummy old operand and insert dummy unused src2_modifiers
1161 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1162 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1164 AMDGPU::OpName::src2_modifiers);
1165}
1166
1168 unsigned Opc = MI.getOpcode();
1169
1170 int VDstInIdx =
1171 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1172 if (VDstInIdx != -1)
1173 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1174
1175 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1176 if (MI.getNumOperands() < DescNumOps &&
1177 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1179 auto Mods = collectVOPModifiers(MI);
1181 AMDGPU::OpName::op_sel);
1182 } else {
1183 // Insert dummy unused src modifiers.
1184 if (MI.getNumOperands() < DescNumOps &&
1185 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1187 AMDGPU::OpName::src0_modifiers);
1188
1189 if (MI.getNumOperands() < DescNumOps &&
1190 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1192 AMDGPU::OpName::src1_modifiers);
1193 }
1194}
1195
1198
1199 int VDstInIdx =
1200 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1201 if (VDstInIdx != -1)
1202 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1203
1204 unsigned Opc = MI.getOpcode();
1205 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1206 if (MI.getNumOperands() < DescNumOps &&
1207 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1208 auto Mods = collectVOPModifiers(MI);
1210 AMDGPU::OpName::op_sel);
1211 }
1212}
1213
1214// Given a wide tuple \p Reg check if it will overflow 256 registers.
1215// \returns \p Reg on success or NoRegister otherwise.
1217 const MCRegisterInfo &MRI) {
1218 unsigned NumRegs = RC.getSizeInBits() / 32;
1219 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1220 if (!Sub0)
1221 return Reg;
1222
1223 MCRegister BaseReg;
1224 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1225 BaseReg = AMDGPU::VGPR0;
1226 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1227 BaseReg = AMDGPU::AGPR0;
1228
1229 assert(BaseReg && "Only vector registers expected");
1230
1231 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1232}
1233
1234// Note that before gfx10, the MIMG encoding provided no information about
1235// VADDR size. Consequently, decoded instructions always show address as if it
1236// has 1 dword, which could be not really so.
1238 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1239
1240 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1241 AMDGPU::OpName::vdst);
1242
1243 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1244 AMDGPU::OpName::vdata);
1245 int VAddr0Idx =
1246 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1247 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1248 ? AMDGPU::OpName::srsrc
1249 : AMDGPU::OpName::rsrc;
1250 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1251 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1252 AMDGPU::OpName::dmask);
1253
1254 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1255 AMDGPU::OpName::tfe);
1256 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1257 AMDGPU::OpName::d16);
1258
1259 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1260 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1261 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1262
1263 assert(VDataIdx != -1);
1264 if (BaseOpcode->BVH) {
1265 // Add A16 operand for intersect_ray instructions
1266 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1267 return;
1268 }
1269
1270 bool IsAtomic = (VDstIdx != -1);
1271 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1272 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1273 bool IsNSA = false;
1274 bool IsPartialNSA = false;
1275 unsigned AddrSize = Info->VAddrDwords;
1276
1277 if (isGFX10Plus()) {
1278 unsigned DimIdx =
1279 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1280 int A16Idx =
1281 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1282 const AMDGPU::MIMGDimInfo *Dim =
1283 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1284 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1285
1286 AddrSize =
1287 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1288
1289 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1290 // VIMAGE insts other than BVH never use vaddr4.
1291 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1292 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1293 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1294 if (!IsNSA) {
1295 if (!IsVSample && AddrSize > 12)
1296 AddrSize = 16;
1297 } else {
1298 if (AddrSize > Info->VAddrDwords) {
1299 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1300 // The NSA encoding does not contain enough operands for the
1301 // combination of base opcode / dimension. Should this be an error?
1302 return;
1303 }
1304 IsPartialNSA = true;
1305 }
1306 }
1307 }
1308
1309 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1310 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1311
1312 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1313 if (D16 && AMDGPU::hasPackedD16(STI)) {
1314 DstSize = (DstSize + 1) / 2;
1315 }
1316
1317 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1318 DstSize += 1;
1319
1320 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1321 return;
1322
1323 int NewOpcode =
1324 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1325 if (NewOpcode == -1)
1326 return;
1327
1328 // Widen the register to the correct number of enabled channels.
1329 MCRegister NewVdata;
1330 if (DstSize != Info->VDataDwords) {
1331 auto DataRCID = MCII->getOpRegClassID(
1332 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1333
1334 // Get first subregister of VData
1335 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1336 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1337 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1338
1339 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1340 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1341 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1342 if (!NewVdata) {
1343 // It's possible to encode this such that the low register + enabled
1344 // components exceeds the register count.
1345 return;
1346 }
1347 }
1348
1349 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1350 // If using partial NSA on GFX11+ widen last address register.
1351 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1352 MCRegister NewVAddrSA;
1353 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1354 AddrSize != Info->VAddrDwords) {
1355 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1356 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1357 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1358
1359 auto AddrRCID = MCII->getOpRegClassID(
1360 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1361
1362 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1363 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1364 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1365 if (!NewVAddrSA)
1366 return;
1367 }
1368
1369 MI.setOpcode(NewOpcode);
1370
1371 if (NewVdata != AMDGPU::NoRegister) {
1372 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1373
1374 if (IsAtomic) {
1375 // Atomic operations have an additional operand (a copy of data)
1376 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1377 }
1378 }
1379
1380 if (NewVAddrSA) {
1381 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1382 } else if (IsNSA) {
1383 assert(AddrSize <= Info->VAddrDwords);
1384 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1385 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1386 }
1387}
1388
1389// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1390// decoder only adds to src_modifiers, so manually add the bits to the other
1391// operands.
1393 unsigned Opc = MI.getOpcode();
1394 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1395 auto Mods = collectVOPModifiers(MI, true);
1396
1397 if (MI.getNumOperands() < DescNumOps &&
1398 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1399 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1400
1401 if (MI.getNumOperands() < DescNumOps &&
1402 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1404 AMDGPU::OpName::op_sel);
1405 if (MI.getNumOperands() < DescNumOps &&
1406 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1408 AMDGPU::OpName::op_sel_hi);
1409 if (MI.getNumOperands() < DescNumOps &&
1410 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1412 AMDGPU::OpName::neg_lo);
1413 if (MI.getNumOperands() < DescNumOps &&
1414 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1416 AMDGPU::OpName::neg_hi);
1417}
1418
1419// Create dummy old operand and insert optional operands
1421 unsigned Opc = MI.getOpcode();
1422 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1423
1424 if (MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1426 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1427
1428 if (MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1431 AMDGPU::OpName::src0_modifiers);
1432
1433 if (MI.getNumOperands() < DescNumOps &&
1434 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1436 AMDGPU::OpName::src1_modifiers);
1437}
1438
1440 unsigned Opc = MI.getOpcode();
1441 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1442
1444
1445 if (MI.getNumOperands() < DescNumOps &&
1446 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1449 AMDGPU::OpName::op_sel);
1450 }
1451}
1452
1454 assert(HasLiteral && "Should have decoded a literal");
1455 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1456}
1457
1458const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1459 return getContext().getRegisterInfo()->
1460 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1461}
1462
1463inline
1465 const Twine& ErrMsg) const {
1466 *CommentStream << "Error: " + ErrMsg;
1467
1468 // ToDo: add support for error operands to MCInst.h
1469 // return MCOperand::createError(V);
1470 return MCOperand();
1471}
1472
1476
1477inline
1479 unsigned Val) const {
1480 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1481 if (Val >= RegCl.getNumRegs())
1482 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1483 ": unknown register " + Twine(Val));
1484 return createRegOperand(RegCl.getRegister(Val));
1485}
1486
1487inline
1489 unsigned Val) const {
1490 // ToDo: SI/CI have 104 SGPRs, VI - 102
1491 // Valery: here we accepting as much as we can, let assembler sort it out
1492 int shift = 0;
1493 switch (SRegClassID) {
1494 case AMDGPU::SGPR_32RegClassID:
1495 case AMDGPU::TTMP_32RegClassID:
1496 break;
1497 case AMDGPU::SGPR_64RegClassID:
1498 case AMDGPU::TTMP_64RegClassID:
1499 shift = 1;
1500 break;
1501 case AMDGPU::SGPR_96RegClassID:
1502 case AMDGPU::TTMP_96RegClassID:
1503 case AMDGPU::SGPR_128RegClassID:
1504 case AMDGPU::TTMP_128RegClassID:
1505 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1506 // this bundle?
1507 case AMDGPU::SGPR_256RegClassID:
1508 case AMDGPU::TTMP_256RegClassID:
1509 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1510 // this bundle?
1511 case AMDGPU::SGPR_288RegClassID:
1512 case AMDGPU::TTMP_288RegClassID:
1513 case AMDGPU::SGPR_320RegClassID:
1514 case AMDGPU::TTMP_320RegClassID:
1515 case AMDGPU::SGPR_352RegClassID:
1516 case AMDGPU::TTMP_352RegClassID:
1517 case AMDGPU::SGPR_384RegClassID:
1518 case AMDGPU::TTMP_384RegClassID:
1519 case AMDGPU::SGPR_512RegClassID:
1520 case AMDGPU::TTMP_512RegClassID:
1521 shift = 2;
1522 break;
1523 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1524 // this bundle?
1525 default:
1526 llvm_unreachable("unhandled register class");
1527 }
1528
1529 if (Val % (1 << shift)) {
1530 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1531 << ": scalar reg isn't aligned " << Val;
1532 }
1533
1534 return createRegOperand(SRegClassID, Val >> shift);
1535}
1536
1538 bool IsHi) const {
1539 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1540 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1541}
1542
1543// Decode Literals for insts which always have a literal in the encoding
1546 if (HasLiteral) {
1547 assert(
1549 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1550 if (Literal != Val)
1551 return errOperand(Val, "More than one unique literal is illegal");
1552 }
1553 HasLiteral = true;
1554 Literal = Val;
1555 return MCOperand::createImm(Literal);
1556}
1557
1560 if (HasLiteral) {
1561 if (Literal != Val)
1562 return errOperand(Val, "More than one unique literal is illegal");
1563 }
1564 HasLiteral = true;
1565 Literal = Val;
1566
1567 bool UseLit64 = Hi_32(Literal) == 0;
1569 LitModifier::Lit64, Literal, getContext()))
1570 : MCOperand::createImm(Literal);
1571}
1572
1575 const MCOperandInfo &OpDesc) const {
1576 // For now all literal constants are supposed to be unsigned integer
1577 // ToDo: deal with signed/unsigned 64-bit integer constants
1578 // ToDo: deal with float/double constants
1579 if (!HasLiteral) {
1580 if (Bytes.size() < 4) {
1581 return errOperand(0, "cannot read literal, inst bytes left " +
1582 Twine(Bytes.size()));
1583 }
1584 HasLiteral = true;
1585 Literal = eatBytes<uint32_t>(Bytes);
1586 }
1587
1588 // For disassembling always assume all inline constants are available.
1589 bool HasInv2Pi = true;
1590
1591 // Invalid instruction codes may contain literals for inline-only
1592 // operands, so we support them here as well.
1593 int64_t Val = Literal;
1594 bool UseLit = false;
1595 switch (OpDesc.OperandType) {
1596 default:
1597 llvm_unreachable("Unexpected operand type!");
1601 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1602 break;
1605 break;
1609 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1610 break;
1612 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1613 break;
1616 break;
1618 break;
1622 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1623 break;
1625 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1626 break;
1636 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1637 break;
1641 Val <<= 32;
1642 break;
1645 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1646 break;
1648 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1649 // decoding a literal in a position of a register operand. Give
1650 // it special handling in the caller, decodeImmOperands(), instead
1651 // of quietly allowing it here.
1652 break;
1653 }
1654
1657 : MCOperand::createImm(Val);
1658}
1659
1661 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1662
1663 if (!HasLiteral) {
1664 if (Bytes.size() < 8) {
1665 return errOperand(0, "cannot read literal64, inst bytes left " +
1666 Twine(Bytes.size()));
1667 }
1668 HasLiteral = true;
1669 Literal = eatBytes<uint64_t>(Bytes);
1670 }
1671
1672 bool UseLit64 = Hi_32(Literal) == 0;
1674 LitModifier::Lit64, Literal, getContext()))
1675 : MCOperand::createImm(Literal);
1676}
1677
1679 using namespace AMDGPU::EncValues;
1680
1681 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1682 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1683 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1684 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1685 // Cast prevents negative overflow.
1686}
1687
1688static int64_t getInlineImmVal32(unsigned Imm) {
1689 switch (Imm) {
1690 case 240:
1691 return llvm::bit_cast<uint32_t>(0.5f);
1692 case 241:
1693 return llvm::bit_cast<uint32_t>(-0.5f);
1694 case 242:
1695 return llvm::bit_cast<uint32_t>(1.0f);
1696 case 243:
1697 return llvm::bit_cast<uint32_t>(-1.0f);
1698 case 244:
1699 return llvm::bit_cast<uint32_t>(2.0f);
1700 case 245:
1701 return llvm::bit_cast<uint32_t>(-2.0f);
1702 case 246:
1703 return llvm::bit_cast<uint32_t>(4.0f);
1704 case 247:
1705 return llvm::bit_cast<uint32_t>(-4.0f);
1706 case 248: // 1 / (2 * PI)
1707 return 0x3e22f983;
1708 default:
1709 llvm_unreachable("invalid fp inline imm");
1710 }
1711}
1712
1713static int64_t getInlineImmVal64(unsigned Imm) {
1714 switch (Imm) {
1715 case 240:
1716 return llvm::bit_cast<uint64_t>(0.5);
1717 case 241:
1718 return llvm::bit_cast<uint64_t>(-0.5);
1719 case 242:
1720 return llvm::bit_cast<uint64_t>(1.0);
1721 case 243:
1722 return llvm::bit_cast<uint64_t>(-1.0);
1723 case 244:
1724 return llvm::bit_cast<uint64_t>(2.0);
1725 case 245:
1726 return llvm::bit_cast<uint64_t>(-2.0);
1727 case 246:
1728 return llvm::bit_cast<uint64_t>(4.0);
1729 case 247:
1730 return llvm::bit_cast<uint64_t>(-4.0);
1731 case 248: // 1 / (2 * PI)
1732 return 0x3fc45f306dc9c882;
1733 default:
1734 llvm_unreachable("invalid fp inline imm");
1735 }
1736}
1737
1738static int64_t getInlineImmValF16(unsigned Imm) {
1739 switch (Imm) {
1740 case 240:
1741 return 0x3800;
1742 case 241:
1743 return 0xB800;
1744 case 242:
1745 return 0x3C00;
1746 case 243:
1747 return 0xBC00;
1748 case 244:
1749 return 0x4000;
1750 case 245:
1751 return 0xC000;
1752 case 246:
1753 return 0x4400;
1754 case 247:
1755 return 0xC400;
1756 case 248: // 1 / (2 * PI)
1757 return 0x3118;
1758 default:
1759 llvm_unreachable("invalid fp inline imm");
1760 }
1761}
1762
1763static int64_t getInlineImmValBF16(unsigned Imm) {
1764 switch (Imm) {
1765 case 240:
1766 return 0x3F00;
1767 case 241:
1768 return 0xBF00;
1769 case 242:
1770 return 0x3F80;
1771 case 243:
1772 return 0xBF80;
1773 case 244:
1774 return 0x4000;
1775 case 245:
1776 return 0xC000;
1777 case 246:
1778 return 0x4080;
1779 case 247:
1780 return 0xC080;
1781 case 248: // 1 / (2 * PI)
1782 return 0x3E22;
1783 default:
1784 llvm_unreachable("invalid fp inline imm");
1785 }
1786}
1787
1788unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1789 using namespace AMDGPU;
1790
1791 switch (Width) {
1792 case 16:
1793 case 32:
1794 return VGPR_32RegClassID;
1795 case 64:
1796 return VReg_64RegClassID;
1797 case 96:
1798 return VReg_96RegClassID;
1799 case 128:
1800 return VReg_128RegClassID;
1801 case 160:
1802 return VReg_160RegClassID;
1803 case 192:
1804 return VReg_192RegClassID;
1805 case 256:
1806 return VReg_256RegClassID;
1807 case 288:
1808 return VReg_288RegClassID;
1809 case 320:
1810 return VReg_320RegClassID;
1811 case 352:
1812 return VReg_352RegClassID;
1813 case 384:
1814 return VReg_384RegClassID;
1815 case 512:
1816 return VReg_512RegClassID;
1817 case 1024:
1818 return VReg_1024RegClassID;
1819 }
1820 llvm_unreachable("Invalid register width!");
1821}
1822
1823unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1824 using namespace AMDGPU;
1825
1826 switch (Width) {
1827 case 16:
1828 case 32:
1829 return AGPR_32RegClassID;
1830 case 64:
1831 return AReg_64RegClassID;
1832 case 96:
1833 return AReg_96RegClassID;
1834 case 128:
1835 return AReg_128RegClassID;
1836 case 160:
1837 return AReg_160RegClassID;
1838 case 256:
1839 return AReg_256RegClassID;
1840 case 288:
1841 return AReg_288RegClassID;
1842 case 320:
1843 return AReg_320RegClassID;
1844 case 352:
1845 return AReg_352RegClassID;
1846 case 384:
1847 return AReg_384RegClassID;
1848 case 512:
1849 return AReg_512RegClassID;
1850 case 1024:
1851 return AReg_1024RegClassID;
1852 }
1853 llvm_unreachable("Invalid register width!");
1854}
1855
1856unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1857 using namespace AMDGPU;
1858
1859 switch (Width) {
1860 case 16:
1861 case 32:
1862 return SGPR_32RegClassID;
1863 case 64:
1864 return SGPR_64RegClassID;
1865 case 96:
1866 return SGPR_96RegClassID;
1867 case 128:
1868 return SGPR_128RegClassID;
1869 case 160:
1870 return SGPR_160RegClassID;
1871 case 256:
1872 return SGPR_256RegClassID;
1873 case 288:
1874 return SGPR_288RegClassID;
1875 case 320:
1876 return SGPR_320RegClassID;
1877 case 352:
1878 return SGPR_352RegClassID;
1879 case 384:
1880 return SGPR_384RegClassID;
1881 case 512:
1882 return SGPR_512RegClassID;
1883 }
1884 llvm_unreachable("Invalid register width!");
1885}
1886
1887unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1888 using namespace AMDGPU;
1889
1890 switch (Width) {
1891 case 16:
1892 case 32:
1893 return TTMP_32RegClassID;
1894 case 64:
1895 return TTMP_64RegClassID;
1896 case 128:
1897 return TTMP_128RegClassID;
1898 case 256:
1899 return TTMP_256RegClassID;
1900 case 288:
1901 return TTMP_288RegClassID;
1902 case 320:
1903 return TTMP_320RegClassID;
1904 case 352:
1905 return TTMP_352RegClassID;
1906 case 384:
1907 return TTMP_384RegClassID;
1908 case 512:
1909 return TTMP_512RegClassID;
1910 }
1911 llvm_unreachable("Invalid register width!");
1912}
1913
1914int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1915 using namespace AMDGPU::EncValues;
1916
1917 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1918 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1919
1920 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1921}
1922
1924 unsigned Val) const {
1925 using namespace AMDGPU::EncValues;
1926
1927 assert(Val < 1024); // enum10
1928
1929 bool IsAGPR = Val & 512;
1930 Val &= 511;
1931
1932 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1933 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1934 : getVgprClassId(Width), Val - VGPR_MIN);
1935 }
1936 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1937}
1938
1940 unsigned Width,
1941 unsigned Val) const {
1942 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1943 // decoded earlier.
1944 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1945 using namespace AMDGPU::EncValues;
1946
1947 if (Val <= SGPR_MAX) {
1948 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1949 static_assert(SGPR_MIN == 0);
1950 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1951 }
1952
1953 int TTmpIdx = getTTmpIdx(Val);
1954 if (TTmpIdx >= 0) {
1955 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1956 }
1957
1958 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1959 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1960 Val == LITERAL_CONST)
1961 return MCOperand::createImm(Val);
1962
1963 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1964 return decodeLiteral64Constant();
1965 }
1966
1967 switch (Width) {
1968 case 32:
1969 case 16:
1970 return decodeSpecialReg32(Val);
1971 case 64:
1972 return decodeSpecialReg64(Val);
1973 case 96:
1974 case 128:
1975 case 256:
1976 case 512:
1977 return decodeSpecialReg96Plus(Val);
1978 default:
1979 llvm_unreachable("unexpected immediate type");
1980 }
1981}
1982
1983// Bit 0 of DstY isn't stored in the instruction, because it's always the
1984// opposite of bit 0 of DstX.
1986 unsigned Val) const {
1987 int VDstXInd =
1988 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1989 assert(VDstXInd != -1);
1990 assert(Inst.getOperand(VDstXInd).isReg());
1991 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1992 Val |= ~XDstReg & 1;
1993 return createRegOperand(getVgprClassId(32), Val);
1994}
1995
1997 using namespace AMDGPU;
1998
1999 switch (Val) {
2000 // clang-format off
2001 case 102: return createRegOperand(FLAT_SCR_LO);
2002 case 103: return createRegOperand(FLAT_SCR_HI);
2003 case 104: return createRegOperand(XNACK_MASK_LO);
2004 case 105: return createRegOperand(XNACK_MASK_HI);
2005 case 106: return createRegOperand(VCC_LO);
2006 case 107: return createRegOperand(VCC_HI);
2007 case 108: return createRegOperand(TBA_LO);
2008 case 109: return createRegOperand(TBA_HI);
2009 case 110: return createRegOperand(TMA_LO);
2010 case 111: return createRegOperand(TMA_HI);
2011 case 124:
2012 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2013 case 125:
2014 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2015 case 126: return createRegOperand(EXEC_LO);
2016 case 127: return createRegOperand(EXEC_HI);
2017 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2018 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2019 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2020 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2021 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2022 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2023 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2024 case 251: return createRegOperand(SRC_VCCZ);
2025 case 252: return createRegOperand(SRC_EXECZ);
2026 case 253: return createRegOperand(SRC_SCC);
2027 case 254: return createRegOperand(LDS_DIRECT);
2028 default: break;
2029 // clang-format on
2030 }
2031 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2032}
2033
2035 using namespace AMDGPU;
2036
2037 switch (Val) {
2038 case 102: return createRegOperand(FLAT_SCR);
2039 case 104: return createRegOperand(XNACK_MASK);
2040 case 106: return createRegOperand(VCC);
2041 case 108: return createRegOperand(TBA);
2042 case 110: return createRegOperand(TMA);
2043 case 124:
2044 if (isGFX11Plus())
2045 return createRegOperand(SGPR_NULL);
2046 break;
2047 case 125:
2048 if (!isGFX11Plus())
2049 return createRegOperand(SGPR_NULL);
2050 break;
2051 case 126: return createRegOperand(EXEC);
2052 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2053 case 235: return createRegOperand(SRC_SHARED_BASE);
2054 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2055 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2056 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2057 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2058 case 251: return createRegOperand(SRC_VCCZ);
2059 case 252: return createRegOperand(SRC_EXECZ);
2060 case 253: return createRegOperand(SRC_SCC);
2061 default: break;
2062 }
2063 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2064}
2065
2067 using namespace AMDGPU;
2068
2069 switch (Val) {
2070 case 124:
2071 if (isGFX11Plus())
2072 return createRegOperand(SGPR_NULL);
2073 break;
2074 case 125:
2075 if (!isGFX11Plus())
2076 return createRegOperand(SGPR_NULL);
2077 break;
2078 default:
2079 break;
2080 }
2081 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2082}
2083
2085 const unsigned Val) const {
2086 using namespace AMDGPU::SDWA;
2087 using namespace AMDGPU::EncValues;
2088
2089 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2090 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2091 // XXX: cast to int is needed to avoid stupid warning:
2092 // compare with unsigned is always true
2093 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2094 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2095 return createRegOperand(getVgprClassId(Width),
2096 Val - SDWA9EncValues::SRC_VGPR_MIN);
2097 }
2098 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2099 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2100 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2101 return createSRegOperand(getSgprClassId(Width),
2102 Val - SDWA9EncValues::SRC_SGPR_MIN);
2103 }
2104 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2105 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2106 return createSRegOperand(getTtmpClassId(Width),
2107 Val - SDWA9EncValues::SRC_TTMP_MIN);
2108 }
2109
2110 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2111
2112 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2113 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2114 return MCOperand::createImm(SVal);
2115
2116 return decodeSpecialReg32(SVal);
2117 }
2118 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2119 return createRegOperand(getVgprClassId(Width), Val);
2120 llvm_unreachable("unsupported target");
2121}
2122
2124 return decodeSDWASrc(16, Val);
2125}
2126
2128 return decodeSDWASrc(32, Val);
2129}
2130
2132 using namespace AMDGPU::SDWA;
2133
2134 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2135 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2136 "SDWAVopcDst should be present only on GFX9+");
2137
2138 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2139
2140 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2141 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2142
2143 int TTmpIdx = getTTmpIdx(Val);
2144 if (TTmpIdx >= 0) {
2145 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2146 return createSRegOperand(TTmpClsId, TTmpIdx);
2147 }
2148 if (Val > SGPR_MAX) {
2149 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2150 }
2151 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2152 }
2153 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2154}
2155
2157 unsigned Val) const {
2158 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2159 ? decodeSrcOp(Inst, 32, Val)
2160 : decodeSrcOp(Inst, 64, Val);
2161}
2162
2164 unsigned Val) const {
2165 return decodeSrcOp(Inst, 32, Val);
2166}
2167
2170 return MCOperand();
2171 return MCOperand::createImm(Val);
2172}
2173
2175 using VersionField = AMDGPU::EncodingField<7, 0>;
2176 using W64Bit = AMDGPU::EncodingBit<13>;
2177 using W32Bit = AMDGPU::EncodingBit<14>;
2178 using MDPBit = AMDGPU::EncodingBit<15>;
2180
2181 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2182
2183 // Decode into a plain immediate if any unused bits are raised.
2184 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2185 return MCOperand::createImm(Imm);
2186
2187 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2188 const auto *I = find_if(
2189 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2190 return V.Code == Version;
2191 });
2192 MCContext &Ctx = getContext();
2193 const MCExpr *E;
2194 if (I == Versions.end())
2196 else
2197 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2198
2199 if (W64)
2200 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2201 if (W32)
2202 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2203 if (MDP)
2204 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2205
2206 return MCOperand::createExpr(E);
2207}
2208
2210 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2211}
2212
2214
2216 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2217}
2218
2220
2222
2226
2228 return STI.hasFeature(AMDGPU::FeatureGFX11);
2229}
2230
2234
2236 return STI.hasFeature(AMDGPU::FeatureGFX12);
2237}
2238
2242
2244
2246 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2247}
2248
2252
2253//===----------------------------------------------------------------------===//
2254// AMDGPU specific symbol handling
2255//===----------------------------------------------------------------------===//
2256
2257/// Print a string describing the reserved bit range specified by Mask with
2258/// offset BaseBytes for use in error comments. Mask is a single continuous
2259/// range of 1s surrounded by zeros. The format here is meant to align with the
2260/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2261static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2262 SmallString<32> Result;
2263 raw_svector_ostream S(Result);
2264
2265 int TrailingZeros = llvm::countr_zero(Mask);
2266 int PopCount = llvm::popcount(Mask);
2267
2268 if (PopCount == 1) {
2269 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2270 } else {
2271 S << "bits in range ("
2272 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2273 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2274 }
2275
2276 return Result;
2277}
2278
2279#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2280#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2281 do { \
2282 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2283 } while (0)
2284#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2285 do { \
2286 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2287 << GET_FIELD(MASK) << '\n'; \
2288 } while (0)
2289
2290#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2291 do { \
2292 if (FourByteBuffer & (MASK)) { \
2293 return createStringError(std::errc::invalid_argument, \
2294 "kernel descriptor " DESC \
2295 " reserved %s set" MSG, \
2296 getBitRangeFromMask((MASK), 0).c_str()); \
2297 } \
2298 } while (0)
2299
2300#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2301#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2302 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2303#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2304 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2305#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2306 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2307
2308// NOLINTNEXTLINE(readability-identifier-naming)
2310 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2311 using namespace amdhsa;
2312 StringRef Indent = "\t";
2313
2314 // We cannot accurately backward compute #VGPRs used from
2315 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2316 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2317 // simply calculate the inverse of what the assembler does.
2318
2319 uint32_t GranulatedWorkitemVGPRCount =
2320 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2321
2322 uint32_t NextFreeVGPR =
2323 (GranulatedWorkitemVGPRCount + 1) *
2324 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2325
2326 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2327
2328 // We cannot backward compute values used to calculate
2329 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2330 // directives can't be computed:
2331 // .amdhsa_reserve_vcc
2332 // .amdhsa_reserve_flat_scratch
2333 // .amdhsa_reserve_xnack_mask
2334 // They take their respective default values if not specified in the assembly.
2335 //
2336 // GRANULATED_WAVEFRONT_SGPR_COUNT
2337 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2338 //
2339 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2340 // are set to 0. So while disassembling we consider that:
2341 //
2342 // GRANULATED_WAVEFRONT_SGPR_COUNT
2343 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2344 //
2345 // The disassembler cannot recover the original values of those 3 directives.
2346
2347 uint32_t GranulatedWavefrontSGPRCount =
2348 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2349
2350 if (isGFX10Plus())
2351 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2352 "must be zero on gfx10+");
2353
2354 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2356
2357 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2359 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2360 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2361 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2362 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2363 << '\n';
2364 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2365
2366 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2367
2368 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2369 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2370 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2371 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2372 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2373 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2374 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2375 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2376
2377 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2378
2379 if (!isGFX12Plus())
2380 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2381 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2382
2383 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2384
2385 if (!isGFX12Plus())
2386 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2387 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2388
2389 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2390 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2391
2392 // Bits [26].
2393 if (isGFX9Plus()) {
2394 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2395 } else {
2396 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2397 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2398 }
2399
2400 // Bits [27].
2401 if (isGFX1250()) {
2402 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2403 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2404 } else {
2405 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2406 "COMPUTE_PGM_RSRC1");
2407 }
2408
2409 // Bits [28].
2410 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2411
2412 // Bits [29-31].
2413 if (isGFX10Plus()) {
2414 // WGP_MODE is not available on GFX1250.
2415 if (!isGFX1250()) {
2416 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2417 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2418 }
2419 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2420 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2421 } else {
2422 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2423 "COMPUTE_PGM_RSRC1");
2424 }
2425
2426 if (isGFX12Plus())
2427 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2428 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2429
2430 return true;
2431}
2432
2433// NOLINTNEXTLINE(readability-identifier-naming)
2435 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2436 using namespace amdhsa;
2437 StringRef Indent = "\t";
2439 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2440 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2441 else
2442 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2443 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2444 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2445 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2446 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2447 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2448 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2449 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2450 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2451 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2452 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2453 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2454
2455 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2456 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2457 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2458
2460 ".amdhsa_exception_fp_ieee_invalid_op",
2461 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2462 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2463 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2465 ".amdhsa_exception_fp_ieee_div_zero",
2466 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2467 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2468 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2469 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2470 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2471 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2472 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2473 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2474 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2475
2476 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2477
2478 return true;
2479}
2480
2481// NOLINTNEXTLINE(readability-identifier-naming)
2483 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2484 using namespace amdhsa;
2485 StringRef Indent = "\t";
2486 if (isGFX90A()) {
2487 KdStream << Indent << ".amdhsa_accum_offset "
2488 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2489 << '\n';
2490
2491 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2492
2493 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2494 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2495 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2496 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2497 } else if (isGFX10Plus()) {
2498 // Bits [0-3].
2499 if (!isGFX12Plus()) {
2500 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2501 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2502 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2503 } else {
2505 "SHARED_VGPR_COUNT",
2506 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2507 }
2508 } else {
2509 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2510 "COMPUTE_PGM_RSRC3",
2511 "must be zero on gfx12+");
2512 }
2513
2514 // Bits [4-11].
2515 if (isGFX11()) {
2516 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2517 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2518 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2519 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2520 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2521 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2522 } else if (isGFX12Plus()) {
2523 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2524 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2525 } else {
2526 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2527 "COMPUTE_PGM_RSRC3",
2528 "must be zero on gfx10");
2529 }
2530
2531 // Bits [12].
2532 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2533 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2534
2535 // Bits [13].
2536 if (isGFX12Plus()) {
2538 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2539 } else {
2540 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2541 "COMPUTE_PGM_RSRC3",
2542 "must be zero on gfx10 or gfx11");
2543 }
2544
2545 // Bits [14-21].
2546 if (isGFX1250()) {
2547 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2548 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2550 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2552 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2554 "ENABLE_DIDT_THROTTLE",
2555 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2556 } else {
2557 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2558 "COMPUTE_PGM_RSRC3",
2559 "must be zero on gfx10+");
2560 }
2561
2562 // Bits [22-30].
2563 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2564 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2565
2566 // Bits [31].
2567 if (isGFX11Plus()) {
2569 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2570 } else {
2571 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2572 "COMPUTE_PGM_RSRC3",
2573 "must be zero on gfx10");
2574 }
2575 } else if (FourByteBuffer) {
2576 return createStringError(
2577 std::errc::invalid_argument,
2578 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2579 }
2580 return true;
2581}
2582#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2583#undef PRINT_DIRECTIVE
2584#undef GET_FIELD
2585#undef CHECK_RESERVED_BITS_IMPL
2586#undef CHECK_RESERVED_BITS
2587#undef CHECK_RESERVED_BITS_MSG
2588#undef CHECK_RESERVED_BITS_DESC
2589#undef CHECK_RESERVED_BITS_DESC_MSG
2590
2591/// Create an error object to return from onSymbolStart for reserved kernel
2592/// descriptor bits being set.
2593static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2594 const char *Msg = "") {
2595 return createStringError(
2596 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2597 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2598}
2599
2600/// Create an error object to return from onSymbolStart for reserved kernel
2601/// descriptor bytes being set.
2602static Error createReservedKDBytesError(unsigned BaseInBytes,
2603 unsigned WidthInBytes) {
2604 // Create an error comment in the same format as the "Kernel Descriptor"
2605 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2606 return createStringError(
2607 std::errc::invalid_argument,
2608 "kernel descriptor reserved bits in range (%u:%u) set",
2609 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2610}
2611
2614 raw_string_ostream &KdStream) const {
2615#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2616 do { \
2617 KdStream << Indent << DIRECTIVE " " \
2618 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2619 } while (0)
2620
2621 uint16_t TwoByteBuffer = 0;
2622 uint32_t FourByteBuffer = 0;
2623
2624 StringRef ReservedBytes;
2625 StringRef Indent = "\t";
2626
2627 assert(Bytes.size() == 64);
2628 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2629
2630 switch (Cursor.tell()) {
2632 FourByteBuffer = DE.getU32(Cursor);
2633 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2634 << '\n';
2635 return true;
2636
2638 FourByteBuffer = DE.getU32(Cursor);
2639 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2640 << FourByteBuffer << '\n';
2641 return true;
2642
2644 FourByteBuffer = DE.getU32(Cursor);
2645 KdStream << Indent << ".amdhsa_kernarg_size "
2646 << FourByteBuffer << '\n';
2647 return true;
2648
2650 // 4 reserved bytes, must be 0.
2651 ReservedBytes = DE.getBytes(Cursor, 4);
2652 for (int I = 0; I < 4; ++I) {
2653 if (ReservedBytes[I] != 0)
2655 }
2656 return true;
2657
2659 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2660 // So far no directive controls this for Code Object V3, so simply skip for
2661 // disassembly.
2662 DE.skip(Cursor, 8);
2663 return true;
2664
2666 // 20 reserved bytes, must be 0.
2667 ReservedBytes = DE.getBytes(Cursor, 20);
2668 for (int I = 0; I < 20; ++I) {
2669 if (ReservedBytes[I] != 0)
2671 }
2672 return true;
2673
2675 FourByteBuffer = DE.getU32(Cursor);
2676 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2677
2679 FourByteBuffer = DE.getU32(Cursor);
2680 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2681
2683 FourByteBuffer = DE.getU32(Cursor);
2684 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2685
2687 using namespace amdhsa;
2688 TwoByteBuffer = DE.getU16(Cursor);
2689
2691 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2692 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2693 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2694 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2695 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2696 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2697 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2698 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2699 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2700 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2702 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2703 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2704 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2705 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2706
2707 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2708 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2710
2711 // Reserved for GFX9
2712 if (isGFX9() &&
2713 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2715 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2716 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2717 }
2718 if (isGFX10Plus()) {
2719 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2720 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2721 }
2722
2723 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2724 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2725 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2726
2727 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2728 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2730 }
2731
2732 return true;
2733
2735 using namespace amdhsa;
2736 TwoByteBuffer = DE.getU16(Cursor);
2737 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2738 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2739 KERNARG_PRELOAD_SPEC_LENGTH);
2740 }
2741
2742 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2743 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2744 KERNARG_PRELOAD_SPEC_OFFSET);
2745 }
2746 return true;
2747
2749 // 4 bytes from here are reserved, must be 0.
2750 ReservedBytes = DE.getBytes(Cursor, 4);
2751 for (int I = 0; I < 4; ++I) {
2752 if (ReservedBytes[I] != 0)
2754 }
2755 return true;
2756
2757 default:
2758 llvm_unreachable("Unhandled index. Case statements cover everything.");
2759 return true;
2760 }
2761#undef PRINT_DIRECTIVE
2762}
2763
2765 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2766
2767 // CP microcode requires the kernel descriptor to be 64 aligned.
2768 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2769 return createStringError(std::errc::invalid_argument,
2770 "kernel descriptor must be 64-byte aligned");
2771
2772 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2773 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2774 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2775 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2776 // when required.
2777 if (isGFX10Plus()) {
2778 uint16_t KernelCodeProperties =
2781 EnableWavefrontSize32 =
2782 AMDHSA_BITS_GET(KernelCodeProperties,
2783 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2784 }
2785
2786 std::string Kd;
2787 raw_string_ostream KdStream(Kd);
2788 KdStream << ".amdhsa_kernel " << KdName << '\n';
2789
2791 while (C && C.tell() < Bytes.size()) {
2792 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2793
2794 cantFail(C.takeError());
2795
2796 if (!Res)
2797 return Res;
2798 }
2799 KdStream << ".end_amdhsa_kernel\n";
2800 outs() << KdStream.str();
2801 return true;
2802}
2803
2805 uint64_t &Size,
2806 ArrayRef<uint8_t> Bytes,
2807 uint64_t Address) const {
2808 // Right now only kernel descriptor needs to be handled.
2809 // We ignore all other symbols for target specific handling.
2810 // TODO:
2811 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2812 // Object V2 and V3 when symbols are marked protected.
2813
2814 // amd_kernel_code_t for Code Object V2.
2815 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2816 Size = 256;
2817 return createStringError(std::errc::invalid_argument,
2818 "code object v2 is not supported");
2819 }
2820
2821 // Code Object V3 kernel descriptors.
2822 StringRef Name = Symbol.Name;
2823 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2824 Size = 64; // Size = 64 regardless of success or failure.
2825 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2826 }
2827
2828 return false;
2829}
2830
2831const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2832 int64_t Val) {
2833 MCContext &Ctx = getContext();
2834 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2835 // Note: only set value to Val on a new symbol in case an dissassembler
2836 // has already been initialized in this context.
2837 if (!Sym->isVariable()) {
2839 } else {
2840 int64_t Res = ~Val;
2841 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2842 if (!Valid || Res != Val)
2843 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2844 }
2845 return MCSymbolRefExpr::create(Sym, Ctx);
2846}
2847
2849 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2850
2851 // Check for MUBUF and MTBUF instructions
2852 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2853 return true;
2854
2855 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2856 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2857 return true;
2858
2859 return false;
2860}
2861
2862//===----------------------------------------------------------------------===//
2863// AMDGPUSymbolizer
2864//===----------------------------------------------------------------------===//
2865
2866// Try to find symbol name for specified label
2868 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2869 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2870 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2871
2872 if (!IsBranch) {
2873 return false;
2874 }
2875
2876 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2877 if (!Symbols)
2878 return false;
2879
2880 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2881 return Val.Addr == static_cast<uint64_t>(Value) &&
2882 Val.Type == ELF::STT_NOTYPE;
2883 });
2884 if (Result != Symbols->end()) {
2885 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2886 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2888 return true;
2889 }
2890 // Add to list of referenced addresses, so caller can synthesize a label.
2891 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2892 return false;
2893}
2894
2896 int64_t Value,
2897 uint64_t Address) {
2898 llvm_unreachable("unimplemented");
2899}
2900
2901//===----------------------------------------------------------------------===//
2902// Initialization
2903//===----------------------------------------------------------------------===//
2904
2906 LLVMOpInfoCallback /*GetOpInfo*/,
2907 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2908 void *DisInfo,
2909 MCContext *Ctx,
2910 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2911 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2912}
2913
2915 const MCSubtargetInfo &STI,
2916 MCContext &Ctx) {
2917 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2918}
2919
2920extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:232
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:217
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:237
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:238
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1416
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ STT_OBJECT
Definition ELF.h:1417
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.