LLVM 22.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
61 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
62 // ToDo: AMDGPUDisassembler supports only VI ISA.
63 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
64 reportFatalUsageError("disassembly not yet supported for subtarget");
65
66 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
67 createConstantSymbolExpr(Symbol, Code);
68
69 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
70 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
71 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
72}
73
77
79addOperand(MCInst &Inst, const MCOperand& Opnd) {
80 Inst.addOperand(Opnd);
81 return Opnd.isValid() ?
84}
85
87 AMDGPU::OpName Name) {
88 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
89 if (OpIdx != -1) {
90 auto *I = MI.begin();
91 std::advance(I, OpIdx);
92 MI.insert(I, Op);
93 }
94 return OpIdx;
95}
96
97static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
98 uint64_t Addr,
99 const MCDisassembler *Decoder) {
100 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
101
102 // Our branches take a simm16.
103 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
104
105 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
107 return addOperand(Inst, MCOperand::createImm(Imm));
108}
109
110static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
111 const MCDisassembler *Decoder) {
112 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
113 int64_t Offset;
114 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
116 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
117 Offset = Imm & 0xFFFFF;
118 } else { // GFX9+ supports 21-bit signed offsets.
120 }
122}
123
124static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
125 const MCDisassembler *Decoder) {
126 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
127 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
128}
129
130static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
131 uint64_t Addr,
132 const MCDisassembler *Decoder) {
133 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
134 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
135}
136
137static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
138 const MCDisassembler *Decoder) {
139 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
140 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
141}
142
143#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
144 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
145 uint64_t /*Addr*/, \
146 const MCDisassembler *Decoder) { \
147 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
148 return addOperand(Inst, DAsm->DecoderName(Imm)); \
149 }
150
151// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
152// number of register. Used by VGPR only and AGPR only operands.
153#define DECODE_OPERAND_REG_8(RegClass) \
154 static DecodeStatus Decode##RegClass##RegisterClass( \
155 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
156 const MCDisassembler *Decoder) { \
157 assert(Imm < (1 << 8) && "8-bit encoding"); \
158 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
159 return addOperand( \
160 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
161 }
162
163#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
164 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
165 const MCDisassembler *Decoder) { \
166 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
167 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
168 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
169 }
170
171static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
172 unsigned OpWidth, unsigned Imm, unsigned EncImm,
173 const MCDisassembler *Decoder) {
174 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
175 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
176 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
177}
178
179// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
180// get register class. Used by SGPR only operands.
181#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
183
184#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
185 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
186
187// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
188// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
189// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
190// Used by AV_ register classes (AGPR or VGPR only register operands).
191template <unsigned OpWidth>
192static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
193 const MCDisassembler *Decoder) {
194 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
195 Decoder);
196}
197
198// Decoder for Src(9-bit encoding) registers only.
199template <unsigned OpWidth>
200static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
201 uint64_t /* Addr */,
202 const MCDisassembler *Decoder) {
203 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
204}
205
206// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
207// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
208// only.
209template <unsigned OpWidth>
210static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
211 const MCDisassembler *Decoder) {
212 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
213}
214
215// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
216// Imm{9} is acc, registers only.
217template <unsigned OpWidth>
218static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
219 uint64_t /* Addr */,
220 const MCDisassembler *Decoder) {
221 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
222}
223
224// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
225// register from RegClass or immediate. Registers that don't belong to RegClass
226// will be decoded and InstPrinter will report warning. Immediate will be
227// decoded into constant matching the OperandType (important for floating point
228// types).
229template <unsigned OpWidth>
230static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
231 uint64_t /* Addr */,
232 const MCDisassembler *Decoder) {
233 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
234}
235
236// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
237// and decode using 'enum10' from decodeSrcOp.
238template <unsigned OpWidth>
239static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
240 uint64_t /* Addr */,
241 const MCDisassembler *Decoder) {
242 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
243}
244
245// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
246// when RegisterClass is used as an operand. Most often used for destination
247// operands.
248
250DECODE_OPERAND_REG_8(VGPR_32_Lo128)
253DECODE_OPERAND_REG_8(VReg_128)
254DECODE_OPERAND_REG_8(VReg_192)
255DECODE_OPERAND_REG_8(VReg_256)
256DECODE_OPERAND_REG_8(VReg_288)
257DECODE_OPERAND_REG_8(VReg_320)
258DECODE_OPERAND_REG_8(VReg_352)
259DECODE_OPERAND_REG_8(VReg_384)
260DECODE_OPERAND_REG_8(VReg_512)
261DECODE_OPERAND_REG_8(VReg_1024)
262
263DECODE_OPERAND_SREG_7(SReg_32, 32)
264DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
265DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
268DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
269DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
270DECODE_OPERAND_SREG_7(SReg_96, 96)
271DECODE_OPERAND_SREG_7(SReg_128, 128)
272DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
273DECODE_OPERAND_SREG_7(SReg_256, 256)
274DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
275DECODE_OPERAND_SREG_7(SReg_512, 512)
276
277DECODE_OPERAND_SREG_8(SReg_64, 64)
278
281DECODE_OPERAND_REG_8(AReg_128)
282DECODE_OPERAND_REG_8(AReg_256)
283DECODE_OPERAND_REG_8(AReg_512)
284DECODE_OPERAND_REG_8(AReg_1024)
285
287 uint64_t /*Addr*/,
288 const MCDisassembler *Decoder) {
289 assert(isUInt<10>(Imm) && "10-bit encoding expected");
290 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
291
292 bool IsHi = Imm & (1 << 9);
293 unsigned RegIdx = Imm & 0xff;
294 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
295 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
296}
297
298static DecodeStatus
300 const MCDisassembler *Decoder) {
301 assert(isUInt<8>(Imm) && "8-bit encoding expected");
302
303 bool IsHi = Imm & (1 << 7);
304 unsigned RegIdx = Imm & 0x7f;
305 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
306 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
307}
308
309template <unsigned OpWidth>
311 uint64_t /*Addr*/,
312 const MCDisassembler *Decoder) {
313 assert(isUInt<9>(Imm) && "9-bit encoding expected");
314
315 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
316 if (Imm & AMDGPU::EncValues::IS_VGPR) {
317 bool IsHi = Imm & (1 << 7);
318 unsigned RegIdx = Imm & 0x7f;
319 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
320 }
321 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
322}
323
324template <unsigned OpWidth>
325static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
326 uint64_t /*Addr*/,
327 const MCDisassembler *Decoder) {
328 assert(isUInt<10>(Imm) && "10-bit encoding expected");
329
330 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
331 if (Imm & AMDGPU::EncValues::IS_VGPR) {
332 bool IsHi = Imm & (1 << 9);
333 unsigned RegIdx = Imm & 0xff;
334 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
335 }
336 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
337}
338
339static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
340 uint64_t /*Addr*/,
341 const MCDisassembler *Decoder) {
342 assert(isUInt<10>(Imm) && "10-bit encoding expected");
343 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
344
345 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
346
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
349 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
350}
351
352static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
353 uint64_t Addr,
354 const MCDisassembler *Decoder) {
355 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
356 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
357}
358
360 uint64_t Addr,
361 const MCDisassembler *Decoder) {
362 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
363 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
364}
365
366static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
367 uint64_t Addr, const void *Decoder) {
368 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
369 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
370}
371
372static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
373 const MCDisassembler *Decoder) {
374 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
375 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
376}
377
378template <unsigned Opw>
379static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
380 uint64_t /* Addr */,
381 const MCDisassembler *Decoder) {
382 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
383}
384
385static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
386 uint64_t Addr,
387 const MCDisassembler *Decoder) {
388 assert(Imm < (1 << 9) && "9-bit encoding");
389 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
390 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
391}
392
393#define DECODE_SDWA(DecName) \
394DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
395
396DECODE_SDWA(Src32)
397DECODE_SDWA(Src16)
398DECODE_SDWA(VopcDst)
399
400static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
401 uint64_t /* Addr */,
402 const MCDisassembler *Decoder) {
403 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
404 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
405}
406
407#include "AMDGPUGenDisassemblerTables.inc"
408
409namespace {
410// Define bitwidths for various types used to instantiate the decoder.
411template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
412template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
413template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
414template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
415} // namespace
416
417//===----------------------------------------------------------------------===//
418//
419//===----------------------------------------------------------------------===//
420
421template <typename InsnType>
423 InsnType Inst, uint64_t Address,
424 raw_ostream &Comments) const {
425 assert(MI.getOpcode() == 0);
426 assert(MI.getNumOperands() == 0);
427 MCInst TmpInst;
428 HasLiteral = false;
429 const auto SavedBytes = Bytes;
430
431 SmallString<64> LocalComments;
432 raw_svector_ostream LocalCommentStream(LocalComments);
433 CommentStream = &LocalCommentStream;
434
435 DecodeStatus Res =
436 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
437
438 CommentStream = nullptr;
439
440 if (Res != MCDisassembler::Fail) {
441 MI = TmpInst;
442 Comments << LocalComments;
444 }
445 Bytes = SavedBytes;
447}
448
449template <typename InsnType>
452 MCInst &MI, InsnType Inst, uint64_t Address,
453 raw_ostream &Comments) const {
454 for (const uint8_t *T : {Table1, Table2}) {
455 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
456 return Res;
457 }
459}
460
461template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
462 assert(Bytes.size() >= sizeof(T));
463 const auto Res =
465 Bytes = Bytes.slice(sizeof(T));
466 return Res;
467}
468
469static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
470 using namespace llvm::support::endian;
471 assert(Bytes.size() >= 12);
472 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
473 Bytes = Bytes.slice(8);
474 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(4);
476 return (Hi << 64) | Lo;
477}
478
479static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
480 using namespace llvm::support::endian;
481 assert(Bytes.size() >= 16);
482 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
483 Bytes = Bytes.slice(8);
484 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 return (Hi << 64) | Lo;
487}
488
489void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
490 const MCInstrInfo &MCII) const {
491 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
492 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
493 if (OpNo >= MI.getNumOperands())
494 continue;
495
496 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
497 // defined to take VGPR_32, but in reality allowing inline constants.
498 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
499 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
500 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
501 continue;
502
503 MCOperand &Op = MI.getOperand(OpNo);
504 if (!Op.isImm())
505 continue;
506 int64_t Imm = Op.getImm();
509 Op = decodeIntImmed(Imm);
510 continue;
511 }
512
515 Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64);
516 continue;
517 }
518
521 switch (OpDesc.OperandType) {
527 break;
534 Imm = getInlineImmValF16(Imm);
535 break;
541 Imm = getInlineImmVal64(Imm);
542 break;
543 default:
544 Imm = getInlineImmVal32(Imm);
545 }
546 Op.setImm(Imm);
547 }
548 }
549}
550
552 ArrayRef<uint8_t> Bytes_,
554 raw_ostream &CS) const {
555 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
556 Bytes = Bytes_.slice(0, MaxInstBytesNum);
557
558 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
559 // there are fewer bytes left). This will be overridden on success.
560 Size = std::min((size_t)4, Bytes_.size());
561
562 do {
563 // ToDo: better to switch encoding length using some bit predicate
564 // but it is unknown yet, so try all we can
565
566 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
567 // encodings
568 if (isGFX1250() && Bytes.size() >= 16) {
569 std::bitset<128> DecW = eat16Bytes(Bytes);
570 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
571 break;
572 Bytes = Bytes_.slice(0, MaxInstBytesNum);
573 }
574
575 if (isGFX11Plus() && Bytes.size() >= 12) {
576 std::bitset<96> DecW = eat12Bytes(Bytes);
577
578 if (isGFX11() &&
579 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
580 DecW, Address, CS))
581 break;
582
583 if (isGFX1250() &&
584 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
585 DecW, Address, CS))
586 break;
587
588 if (isGFX12() &&
589 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
590 DecW, Address, CS))
591 break;
592
593 if (isGFX12() &&
594 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
595 break;
596
597 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
598 // Return 8 bytes for a potential literal.
599 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
600
601 if (isGFX1250() &&
602 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
603 break;
604 }
605
606 // Reinitialize Bytes
607 Bytes = Bytes_.slice(0, MaxInstBytesNum);
608
609 } else if (Bytes.size() >= 16 &&
610 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
611 std::bitset<128> DecW = eat16Bytes(Bytes);
612 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
613 break;
614
615 // Reinitialize Bytes
616 Bytes = Bytes_.slice(0, MaxInstBytesNum);
617 }
618
619 if (Bytes.size() >= 8) {
620 const uint64_t QW = eatBytes<uint64_t>(Bytes);
621
622 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
623 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
624 break;
625
626 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
627 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
628 break;
629
630 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
631 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
632 break;
633
634 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
635 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
636 // table first so we print the correct name.
637 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
638 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
639 break;
640
641 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
642 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
643 break;
644
645 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
646 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
647 break;
648
649 if ((isVI() || isGFX9()) &&
650 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
651 break;
652
653 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
654 break;
655
656 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
657 break;
658
659 if (isGFX1250() &&
660 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
661 QW, Address, CS))
662 break;
663
664 if (isGFX12() &&
665 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
666 Address, CS))
667 break;
668
669 if (isGFX11() &&
670 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
671 Address, CS))
672 break;
673
674 if (isGFX11() &&
675 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
676 break;
677
678 if (isGFX12() &&
679 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
680 break;
681
682 // Reinitialize Bytes
683 Bytes = Bytes_.slice(0, MaxInstBytesNum);
684 }
685
686 // Try decode 32-bit instruction
687 if (Bytes.size() >= 4) {
688 const uint32_t DW = eatBytes<uint32_t>(Bytes);
689
690 if ((isVI() || isGFX9()) &&
691 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
692 break;
693
694 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
695 break;
696
697 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
698 break;
699
700 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
701 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
702 break;
703
704 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
705 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
706 break;
707
708 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
709 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
710 break;
711
712 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
713 break;
714
715 if (isGFX11() &&
716 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
717 Address, CS))
718 break;
719
720 if (isGFX1250() &&
721 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
722 DW, Address, CS))
723 break;
724
725 if (isGFX12() &&
726 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
727 Address, CS))
728 break;
729 }
730
732 } while (false);
733
735
736 decodeImmOperands(MI, *MCII);
737
738 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
739 if (isMacDPP(MI))
741
742 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
744 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
745 convertVOPCDPPInst(MI); // Special VOP3 case
746 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
747 convertVOPC64DPPInst(MI); // Special VOP3 case
748 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
749 -1)
751 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
752 convertVOP3DPPInst(MI); // Regular VOP3 case
753 }
754
756
757 if (AMDGPU::isMAC(MI.getOpcode())) {
758 // Insert dummy unused src2_modifiers.
760 AMDGPU::OpName::src2_modifiers);
761 }
762
763 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
764 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
765 // Insert dummy unused src2_modifiers.
767 AMDGPU::OpName::src2_modifiers);
768 }
769
770 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
772 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
773 }
774
775 if (MCII->get(MI.getOpcode()).TSFlags &
777 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
778 AMDGPU::OpName::cpol);
779 if (CPolPos != -1) {
780 unsigned CPol =
781 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
783 if (MI.getNumOperands() <= (unsigned)CPolPos) {
785 AMDGPU::OpName::cpol);
786 } else if (CPol) {
787 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
788 }
789 }
790 }
791
792 if ((MCII->get(MI.getOpcode()).TSFlags &
794 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
795 // GFX90A lost TFE, its place is occupied by ACC.
796 int TFEOpIdx =
797 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
798 if (TFEOpIdx != -1) {
799 auto *TFEIter = MI.begin();
800 std::advance(TFEIter, TFEOpIdx);
801 MI.insert(TFEIter, MCOperand::createImm(0));
802 }
803 }
804
805 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
807 int OffsetIdx =
808 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
809 if (OffsetIdx != -1) {
810 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
811 int64_t SignedOffset = SignExtend64<24>(Imm);
812 if (SignedOffset < 0)
814 }
815 }
816
817 if (MCII->get(MI.getOpcode()).TSFlags &
819 int SWZOpIdx =
820 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
821 if (SWZOpIdx != -1) {
822 auto *SWZIter = MI.begin();
823 std::advance(SWZIter, SWZOpIdx);
824 MI.insert(SWZIter, MCOperand::createImm(0));
825 }
826 }
827
828 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
829 int VAddr0Idx =
830 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
831 int RsrcIdx =
832 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
833 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
834 if (VAddr0Idx >= 0 && NSAArgs > 0) {
835 unsigned NSAWords = (NSAArgs + 3) / 4;
836 if (Bytes.size() < 4 * NSAWords)
838 for (unsigned i = 0; i < NSAArgs; ++i) {
839 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
840 auto VAddrRCID =
841 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
842 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
843 }
844 Bytes = Bytes.slice(4 * NSAWords);
845 }
846
848 }
849
850 if (MCII->get(MI.getOpcode()).TSFlags &
853
854 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
856
857 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
859
860 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
862
863 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
865
866 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
868
869 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
870 AMDGPU::OpName::vdst_in);
871 if (VDstIn_Idx != -1) {
872 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
874 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
875 !MI.getOperand(VDstIn_Idx).isReg() ||
876 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
877 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
878 MI.erase(&MI.getOperand(VDstIn_Idx));
880 MCOperand::createReg(MI.getOperand(Tied).getReg()),
881 AMDGPU::OpName::vdst_in);
882 }
883 }
884
885 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
886 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
888
889 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
890 // have EXEC as implicit destination. Issue a warning if encoding for
891 // vdst is not EXEC.
892 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
893 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
894 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
895 if (Bytes_[0] != ExecEncoding)
897 }
898
899 Size = MaxInstBytesNum - Bytes.size();
900 return Status;
901}
902
904 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
905 // The MCInst still has these fields even though they are no longer encoded
906 // in the GFX11 instruction.
907 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
908 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
909 }
910}
911
914 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
915 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
916 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
917 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
925 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
930 // The MCInst has this field that is not directly encoded in the
931 // instruction.
932 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
933 }
934}
935
937 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
938 STI.hasFeature(AMDGPU::FeatureGFX10)) {
939 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
940 // VOPC - insert clamp
941 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
942 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
943 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
944 if (SDst != -1) {
945 // VOPC - insert VCC register as sdst
947 AMDGPU::OpName::sdst);
948 } else {
949 // VOP1/2 - insert omod if present in instruction
950 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
951 }
952 }
953}
954
955/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
956/// appropriate subregister for the used format width.
958 MCOperand &MO, uint8_t NumRegs) {
959 switch (NumRegs) {
960 case 4:
961 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
962 case 6:
963 return MO.setReg(
964 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
965 case 8:
966 if (MCRegister NewReg = MRI.getSubReg(
967 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
968 MO.setReg(NewReg);
969 }
970 return;
971 case 12: {
972 // There is no 384-bit subreg index defined.
973 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
974 MCRegister NewReg = MRI.getMatchingSuperReg(
975 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
976 return MO.setReg(NewReg);
977 }
978 case 16:
979 // No-op in cases where one operand is still f8/bf8.
980 return;
981 default:
982 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
983 }
984}
985
986/// f8f6f4 instructions have different pseudos depending on the used formats. In
987/// the disassembler table, we only have the variants with the largest register
988/// classes which assume using an fp8/bf8 format for both operands. The actual
989/// register class depends on the format in blgp and cbsz operands. Adjust the
990/// register classes depending on the used format.
992 int BlgpIdx =
993 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
994 if (BlgpIdx == -1)
995 return;
996
997 int CbszIdx =
998 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
999
1000 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1001 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1002
1003 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1004 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1005 if (!AdjustedRegClassOpcode ||
1006 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1007 return;
1008
1009 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1010 int Src0Idx =
1011 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1012 int Src1Idx =
1013 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1014 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1015 AdjustedRegClassOpcode->NumRegsSrcA);
1016 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1017 AdjustedRegClassOpcode->NumRegsSrcB);
1018}
1019
1021 int FmtAIdx =
1022 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1023 if (FmtAIdx == -1)
1024 return;
1025
1026 int FmtBIdx =
1027 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1028
1029 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1030 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1031
1032 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1033 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1034 if (!AdjustedRegClassOpcode ||
1035 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1036 return;
1037
1038 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1039 int Src0Idx =
1040 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1041 int Src1Idx =
1042 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1043 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1044 AdjustedRegClassOpcode->NumRegsSrcA);
1045 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1046 AdjustedRegClassOpcode->NumRegsSrcB);
1047}
1048
1050 unsigned OpSel = 0;
1051 unsigned OpSelHi = 0;
1052 unsigned NegLo = 0;
1053 unsigned NegHi = 0;
1054};
1055
1056// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1057// Note that these values do not affect disassembler output,
1058// so this is only necessary for consistency with src_modifiers.
1060 bool IsVOP3P = false) {
1061 VOPModifiers Modifiers;
1062 unsigned Opc = MI.getOpcode();
1063 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1064 AMDGPU::OpName::src1_modifiers,
1065 AMDGPU::OpName::src2_modifiers};
1066 for (int J = 0; J < 3; ++J) {
1067 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1068 if (OpIdx == -1)
1069 continue;
1070
1071 unsigned Val = MI.getOperand(OpIdx).getImm();
1072
1073 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1074 if (IsVOP3P) {
1075 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1076 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1077 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1078 } else if (J == 0) {
1079 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1080 }
1081 }
1082
1083 return Modifiers;
1084}
1085
1086// Instructions decode the op_sel/suffix bits into the src_modifier
1087// operands. Copy those bits into the src operands for true16 VGPRs.
1089 const unsigned Opc = MI.getOpcode();
1090 const MCRegisterClass &ConversionRC =
1091 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1092 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1093 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1095 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1097 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1099 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1101 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1102 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1103 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1104 if (OpIdx == -1 || OpModsIdx == -1)
1105 continue;
1106 MCOperand &Op = MI.getOperand(OpIdx);
1107 if (!Op.isReg())
1108 continue;
1109 if (!ConversionRC.contains(Op.getReg()))
1110 continue;
1111 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1112 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1113 unsigned ModVal = OpMods.getImm();
1114 if (ModVal & OpSelMask) { // isHi
1115 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1116 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1117 }
1118 }
1119}
1120
1121// MAC opcodes have special old and src2 operands.
1122// src2 is tied to dst, while old is not tied (but assumed to be).
1124 constexpr int DST_IDX = 0;
1125 auto Opcode = MI.getOpcode();
1126 const auto &Desc = MCII->get(Opcode);
1127 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1128
1129 if (OldIdx != -1 && Desc.getOperandConstraint(
1130 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1131 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1132 assert(Desc.getOperandConstraint(
1133 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1135 (void)DST_IDX;
1136 return true;
1137 }
1138
1139 return false;
1140}
1141
1142// Create dummy old operand and insert dummy unused src2_modifiers
1144 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1145 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1147 AMDGPU::OpName::src2_modifiers);
1148}
1149
1151 unsigned Opc = MI.getOpcode();
1152
1153 int VDstInIdx =
1154 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1155 if (VDstInIdx != -1)
1156 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1157
1158 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1159 if (MI.getNumOperands() < DescNumOps &&
1160 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1162 auto Mods = collectVOPModifiers(MI);
1164 AMDGPU::OpName::op_sel);
1165 } else {
1166 // Insert dummy unused src modifiers.
1167 if (MI.getNumOperands() < DescNumOps &&
1168 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1170 AMDGPU::OpName::src0_modifiers);
1171
1172 if (MI.getNumOperands() < DescNumOps &&
1173 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1175 AMDGPU::OpName::src1_modifiers);
1176 }
1177}
1178
1181
1182 int VDstInIdx =
1183 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1184 if (VDstInIdx != -1)
1185 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1186
1187 unsigned Opc = MI.getOpcode();
1188 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1189 if (MI.getNumOperands() < DescNumOps &&
1190 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1191 auto Mods = collectVOPModifiers(MI);
1193 AMDGPU::OpName::op_sel);
1194 }
1195}
1196
1197// Given a wide tuple \p Reg check if it will overflow 256 registers.
1198// \returns \p Reg on success or NoRegister otherwise.
1199static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
1200 const MCRegisterInfo &MRI) {
1201 unsigned NumRegs = RC.getSizeInBits() / 32;
1202 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1203 if (!Sub0)
1204 return Reg;
1205
1206 MCRegister BaseReg;
1207 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1208 BaseReg = AMDGPU::VGPR0;
1209 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1210 BaseReg = AMDGPU::AGPR0;
1211
1212 assert(BaseReg && "Only vector registers expected");
1213
1214 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
1215}
1216
1217// Note that before gfx10, the MIMG encoding provided no information about
1218// VADDR size. Consequently, decoded instructions always show address as if it
1219// has 1 dword, which could be not really so.
1221 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1222
1223 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1224 AMDGPU::OpName::vdst);
1225
1226 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1227 AMDGPU::OpName::vdata);
1228 int VAddr0Idx =
1229 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1230 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1231 ? AMDGPU::OpName::srsrc
1232 : AMDGPU::OpName::rsrc;
1233 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1234 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1235 AMDGPU::OpName::dmask);
1236
1237 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1238 AMDGPU::OpName::tfe);
1239 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1240 AMDGPU::OpName::d16);
1241
1242 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1243 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1244 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1245
1246 assert(VDataIdx != -1);
1247 if (BaseOpcode->BVH) {
1248 // Add A16 operand for intersect_ray instructions
1249 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1250 return;
1251 }
1252
1253 bool IsAtomic = (VDstIdx != -1);
1254 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1255 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1256 bool IsNSA = false;
1257 bool IsPartialNSA = false;
1258 unsigned AddrSize = Info->VAddrDwords;
1259
1260 if (isGFX10Plus()) {
1261 unsigned DimIdx =
1262 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1263 int A16Idx =
1264 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1265 const AMDGPU::MIMGDimInfo *Dim =
1266 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1267 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1268
1269 AddrSize =
1270 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1271
1272 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1273 // VIMAGE insts other than BVH never use vaddr4.
1274 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1275 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1276 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1277 if (!IsNSA) {
1278 if (!IsVSample && AddrSize > 12)
1279 AddrSize = 16;
1280 } else {
1281 if (AddrSize > Info->VAddrDwords) {
1282 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1283 // The NSA encoding does not contain enough operands for the
1284 // combination of base opcode / dimension. Should this be an error?
1285 return;
1286 }
1287 IsPartialNSA = true;
1288 }
1289 }
1290 }
1291
1292 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1293 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1294
1295 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1296 if (D16 && AMDGPU::hasPackedD16(STI)) {
1297 DstSize = (DstSize + 1) / 2;
1298 }
1299
1300 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1301 DstSize += 1;
1302
1303 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1304 return;
1305
1306 int NewOpcode =
1307 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1308 if (NewOpcode == -1)
1309 return;
1310
1311 // Widen the register to the correct number of enabled channels.
1312 MCRegister NewVdata;
1313 if (DstSize != Info->VDataDwords) {
1314 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1315
1316 // Get first subregister of VData
1317 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1318 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1319 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1320
1321 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1322 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1323 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1324 if (!NewVdata) {
1325 // It's possible to encode this such that the low register + enabled
1326 // components exceeds the register count.
1327 return;
1328 }
1329 }
1330
1331 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1332 // If using partial NSA on GFX11+ widen last address register.
1333 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1334 MCRegister NewVAddrSA;
1335 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1336 AddrSize != Info->VAddrDwords) {
1337 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1338 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1339 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1340
1341 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1342 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1343 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1344 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1345 if (!NewVAddrSA)
1346 return;
1347 }
1348
1349 MI.setOpcode(NewOpcode);
1350
1351 if (NewVdata != AMDGPU::NoRegister) {
1352 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1353
1354 if (IsAtomic) {
1355 // Atomic operations have an additional operand (a copy of data)
1356 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1357 }
1358 }
1359
1360 if (NewVAddrSA) {
1361 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1362 } else if (IsNSA) {
1363 assert(AddrSize <= Info->VAddrDwords);
1364 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1365 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1366 }
1367}
1368
1369// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1370// decoder only adds to src_modifiers, so manually add the bits to the other
1371// operands.
1373 unsigned Opc = MI.getOpcode();
1374 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1375 auto Mods = collectVOPModifiers(MI, true);
1376
1377 if (MI.getNumOperands() < DescNumOps &&
1378 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1379 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1380
1381 if (MI.getNumOperands() < DescNumOps &&
1382 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1384 AMDGPU::OpName::op_sel);
1385 if (MI.getNumOperands() < DescNumOps &&
1386 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1388 AMDGPU::OpName::op_sel_hi);
1389 if (MI.getNumOperands() < DescNumOps &&
1390 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1392 AMDGPU::OpName::neg_lo);
1393 if (MI.getNumOperands() < DescNumOps &&
1394 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1396 AMDGPU::OpName::neg_hi);
1397}
1398
1399// Create dummy old operand and insert optional operands
1401 unsigned Opc = MI.getOpcode();
1402 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1403
1404 if (MI.getNumOperands() < DescNumOps &&
1405 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1406 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1407
1408 if (MI.getNumOperands() < DescNumOps &&
1409 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1411 AMDGPU::OpName::src0_modifiers);
1412
1413 if (MI.getNumOperands() < DescNumOps &&
1414 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1416 AMDGPU::OpName::src1_modifiers);
1417}
1418
1420 unsigned Opc = MI.getOpcode();
1421 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1422
1424
1425 if (MI.getNumOperands() < DescNumOps &&
1426 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1429 AMDGPU::OpName::op_sel);
1430 }
1431}
1432
1434 assert(HasLiteral && "Should have decoded a literal");
1435 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1436}
1437
1438const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1439 return getContext().getRegisterInfo()->
1440 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1441}
1442
1443inline
1445 const Twine& ErrMsg) const {
1446 *CommentStream << "Error: " + ErrMsg;
1447
1448 // ToDo: add support for error operands to MCInst.h
1449 // return MCOperand::createError(V);
1450 return MCOperand();
1451}
1452
1453inline
1456}
1457
1458inline
1460 unsigned Val) const {
1461 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1462 if (Val >= RegCl.getNumRegs())
1463 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1464 ": unknown register " + Twine(Val));
1465 return createRegOperand(RegCl.getRegister(Val));
1466}
1467
1468inline
1470 unsigned Val) const {
1471 // ToDo: SI/CI have 104 SGPRs, VI - 102
1472 // Valery: here we accepting as much as we can, let assembler sort it out
1473 int shift = 0;
1474 switch (SRegClassID) {
1475 case AMDGPU::SGPR_32RegClassID:
1476 case AMDGPU::TTMP_32RegClassID:
1477 break;
1478 case AMDGPU::SGPR_64RegClassID:
1479 case AMDGPU::TTMP_64RegClassID:
1480 shift = 1;
1481 break;
1482 case AMDGPU::SGPR_96RegClassID:
1483 case AMDGPU::TTMP_96RegClassID:
1484 case AMDGPU::SGPR_128RegClassID:
1485 case AMDGPU::TTMP_128RegClassID:
1486 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1487 // this bundle?
1488 case AMDGPU::SGPR_256RegClassID:
1489 case AMDGPU::TTMP_256RegClassID:
1490 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1491 // this bundle?
1492 case AMDGPU::SGPR_288RegClassID:
1493 case AMDGPU::TTMP_288RegClassID:
1494 case AMDGPU::SGPR_320RegClassID:
1495 case AMDGPU::TTMP_320RegClassID:
1496 case AMDGPU::SGPR_352RegClassID:
1497 case AMDGPU::TTMP_352RegClassID:
1498 case AMDGPU::SGPR_384RegClassID:
1499 case AMDGPU::TTMP_384RegClassID:
1500 case AMDGPU::SGPR_512RegClassID:
1501 case AMDGPU::TTMP_512RegClassID:
1502 shift = 2;
1503 break;
1504 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1505 // this bundle?
1506 default:
1507 llvm_unreachable("unhandled register class");
1508 }
1509
1510 if (Val % (1 << shift)) {
1511 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1512 << ": scalar reg isn't aligned " << Val;
1513 }
1514
1515 return createRegOperand(SRegClassID, Val >> shift);
1516}
1517
1519 bool IsHi) const {
1520 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1521 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1522}
1523
1524// Decode Literals for insts which always have a literal in the encoding
1527 if (HasLiteral) {
1528 assert(
1530 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1531 if (Literal != Val)
1532 return errOperand(Val, "More than one unique literal is illegal");
1533 }
1534 HasLiteral = true;
1535 Literal = Val;
1536 return MCOperand::createImm(Literal);
1537}
1538
1541 if (HasLiteral) {
1542 if (Literal64 != Val)
1543 return errOperand(Val, "More than one unique literal is illegal");
1544 }
1545 HasLiteral = true;
1546 Literal = Literal64 = Val;
1547
1548 bool UseLit64 = Lo_32(Literal64) != 0;
1550 LitModifier::Lit64, Literal64, getContext()))
1551 : MCOperand::createImm(Literal64);
1552}
1553
1555 const MCOperandInfo &OpDesc,
1556 bool ExtendFP64) const {
1557 // For now all literal constants are supposed to be unsigned integer
1558 // ToDo: deal with signed/unsigned 64-bit integer constants
1559 // ToDo: deal with float/double constants
1560 if (!HasLiteral) {
1561 if (Bytes.size() < 4) {
1562 return errOperand(0, "cannot read literal, inst bytes left " +
1563 Twine(Bytes.size()));
1564 }
1565 HasLiteral = true;
1566 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1567 if (ExtendFP64)
1568 Literal64 <<= 32;
1569 }
1570
1571 int64_t Val = ExtendFP64 ? Literal64 : Literal;
1572
1573 bool CanUse64BitLiterals =
1574 STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
1576
1577 bool UseLit64 = false;
1578 if (CanUse64BitLiterals) {
1581 UseLit64 = !isInt<32>(Val) || !isUInt<32>(Val);
1582 else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
1585 UseLit64 = Lo_32(Val) != 0;
1586 }
1587
1590 : MCOperand::createImm(Val);
1591}
1592
1595 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1596
1597 if (!HasLiteral) {
1598 if (Bytes.size() < 8) {
1599 return errOperand(0, "cannot read literal64, inst bytes left " +
1600 Twine(Bytes.size()));
1601 }
1602 HasLiteral = true;
1603 Literal64 = eatBytes<uint64_t>(Bytes);
1604 }
1605
1606 bool UseLit64 = false;
1607 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
1608 const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
1611 UseLit64 = !isInt<32>(Literal64) || !isUInt<32>(Literal64);
1612 } else {
1616 UseLit64 = Lo_32(Literal64) != 0;
1617 }
1618
1620 LitModifier::Lit64, Literal64, getContext()))
1621 : MCOperand::createImm(Literal64);
1622}
1623
1625 using namespace AMDGPU::EncValues;
1626
1627 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1628 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1629 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1630 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1631 // Cast prevents negative overflow.
1632}
1633
1634static int64_t getInlineImmVal32(unsigned Imm) {
1635 switch (Imm) {
1636 case 240:
1637 return llvm::bit_cast<uint32_t>(0.5f);
1638 case 241:
1639 return llvm::bit_cast<uint32_t>(-0.5f);
1640 case 242:
1641 return llvm::bit_cast<uint32_t>(1.0f);
1642 case 243:
1643 return llvm::bit_cast<uint32_t>(-1.0f);
1644 case 244:
1645 return llvm::bit_cast<uint32_t>(2.0f);
1646 case 245:
1647 return llvm::bit_cast<uint32_t>(-2.0f);
1648 case 246:
1649 return llvm::bit_cast<uint32_t>(4.0f);
1650 case 247:
1651 return llvm::bit_cast<uint32_t>(-4.0f);
1652 case 248: // 1 / (2 * PI)
1653 return 0x3e22f983;
1654 default:
1655 llvm_unreachable("invalid fp inline imm");
1656 }
1657}
1658
1659static int64_t getInlineImmVal64(unsigned Imm) {
1660 switch (Imm) {
1661 case 240:
1662 return llvm::bit_cast<uint64_t>(0.5);
1663 case 241:
1664 return llvm::bit_cast<uint64_t>(-0.5);
1665 case 242:
1666 return llvm::bit_cast<uint64_t>(1.0);
1667 case 243:
1668 return llvm::bit_cast<uint64_t>(-1.0);
1669 case 244:
1670 return llvm::bit_cast<uint64_t>(2.0);
1671 case 245:
1672 return llvm::bit_cast<uint64_t>(-2.0);
1673 case 246:
1674 return llvm::bit_cast<uint64_t>(4.0);
1675 case 247:
1676 return llvm::bit_cast<uint64_t>(-4.0);
1677 case 248: // 1 / (2 * PI)
1678 return 0x3fc45f306dc9c882;
1679 default:
1680 llvm_unreachable("invalid fp inline imm");
1681 }
1682}
1683
1684static int64_t getInlineImmValF16(unsigned Imm) {
1685 switch (Imm) {
1686 case 240:
1687 return 0x3800;
1688 case 241:
1689 return 0xB800;
1690 case 242:
1691 return 0x3C00;
1692 case 243:
1693 return 0xBC00;
1694 case 244:
1695 return 0x4000;
1696 case 245:
1697 return 0xC000;
1698 case 246:
1699 return 0x4400;
1700 case 247:
1701 return 0xC400;
1702 case 248: // 1 / (2 * PI)
1703 return 0x3118;
1704 default:
1705 llvm_unreachable("invalid fp inline imm");
1706 }
1707}
1708
1709static int64_t getInlineImmValBF16(unsigned Imm) {
1710 switch (Imm) {
1711 case 240:
1712 return 0x3F00;
1713 case 241:
1714 return 0xBF00;
1715 case 242:
1716 return 0x3F80;
1717 case 243:
1718 return 0xBF80;
1719 case 244:
1720 return 0x4000;
1721 case 245:
1722 return 0xC000;
1723 case 246:
1724 return 0x4080;
1725 case 247:
1726 return 0xC080;
1727 case 248: // 1 / (2 * PI)
1728 return 0x3E22;
1729 default:
1730 llvm_unreachable("invalid fp inline imm");
1731 }
1732}
1733
1734unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1735 using namespace AMDGPU;
1736
1737 switch (Width) {
1738 case 16:
1739 case 32:
1740 return VGPR_32RegClassID;
1741 case 64:
1742 return VReg_64RegClassID;
1743 case 96:
1744 return VReg_96RegClassID;
1745 case 128:
1746 return VReg_128RegClassID;
1747 case 160:
1748 return VReg_160RegClassID;
1749 case 192:
1750 return VReg_192RegClassID;
1751 case 256:
1752 return VReg_256RegClassID;
1753 case 288:
1754 return VReg_288RegClassID;
1755 case 320:
1756 return VReg_320RegClassID;
1757 case 352:
1758 return VReg_352RegClassID;
1759 case 384:
1760 return VReg_384RegClassID;
1761 case 512:
1762 return VReg_512RegClassID;
1763 case 1024:
1764 return VReg_1024RegClassID;
1765 }
1766 llvm_unreachable("Invalid register width!");
1767}
1768
1769unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1770 using namespace AMDGPU;
1771
1772 switch (Width) {
1773 case 16:
1774 case 32:
1775 return AGPR_32RegClassID;
1776 case 64:
1777 return AReg_64RegClassID;
1778 case 96:
1779 return AReg_96RegClassID;
1780 case 128:
1781 return AReg_128RegClassID;
1782 case 160:
1783 return AReg_160RegClassID;
1784 case 256:
1785 return AReg_256RegClassID;
1786 case 288:
1787 return AReg_288RegClassID;
1788 case 320:
1789 return AReg_320RegClassID;
1790 case 352:
1791 return AReg_352RegClassID;
1792 case 384:
1793 return AReg_384RegClassID;
1794 case 512:
1795 return AReg_512RegClassID;
1796 case 1024:
1797 return AReg_1024RegClassID;
1798 }
1799 llvm_unreachable("Invalid register width!");
1800}
1801
1802unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1803 using namespace AMDGPU;
1804
1805 switch (Width) {
1806 case 16:
1807 case 32:
1808 return SGPR_32RegClassID;
1809 case 64:
1810 return SGPR_64RegClassID;
1811 case 96:
1812 return SGPR_96RegClassID;
1813 case 128:
1814 return SGPR_128RegClassID;
1815 case 160:
1816 return SGPR_160RegClassID;
1817 case 256:
1818 return SGPR_256RegClassID;
1819 case 288:
1820 return SGPR_288RegClassID;
1821 case 320:
1822 return SGPR_320RegClassID;
1823 case 352:
1824 return SGPR_352RegClassID;
1825 case 384:
1826 return SGPR_384RegClassID;
1827 case 512:
1828 return SGPR_512RegClassID;
1829 }
1830 llvm_unreachable("Invalid register width!");
1831}
1832
1833unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1834 using namespace AMDGPU;
1835
1836 switch (Width) {
1837 case 16:
1838 case 32:
1839 return TTMP_32RegClassID;
1840 case 64:
1841 return TTMP_64RegClassID;
1842 case 128:
1843 return TTMP_128RegClassID;
1844 case 256:
1845 return TTMP_256RegClassID;
1846 case 288:
1847 return TTMP_288RegClassID;
1848 case 320:
1849 return TTMP_320RegClassID;
1850 case 352:
1851 return TTMP_352RegClassID;
1852 case 384:
1853 return TTMP_384RegClassID;
1854 case 512:
1855 return TTMP_512RegClassID;
1856 }
1857 llvm_unreachable("Invalid register width!");
1858}
1859
1860int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1861 using namespace AMDGPU::EncValues;
1862
1863 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1864 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1865
1866 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1867}
1868
1870 unsigned Val) const {
1871 using namespace AMDGPU::EncValues;
1872
1873 assert(Val < 1024); // enum10
1874
1875 bool IsAGPR = Val & 512;
1876 Val &= 511;
1877
1878 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1879 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1880 : getVgprClassId(Width), Val - VGPR_MIN);
1881 }
1882 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1883}
1884
1886 unsigned Width,
1887 unsigned Val) const {
1888 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1889 // decoded earlier.
1890 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1891 using namespace AMDGPU::EncValues;
1892
1893 if (Val <= SGPR_MAX) {
1894 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1895 static_assert(SGPR_MIN == 0);
1896 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1897 }
1898
1899 int TTmpIdx = getTTmpIdx(Val);
1900 if (TTmpIdx >= 0) {
1901 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1902 }
1903
1904 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1905 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1906 Val == LITERAL_CONST)
1907 return MCOperand::createImm(Val);
1908
1909 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1910 return decodeLiteral64Constant(Inst);
1911 }
1912
1913 switch (Width) {
1914 case 32:
1915 case 16:
1916 return decodeSpecialReg32(Val);
1917 case 64:
1918 return decodeSpecialReg64(Val);
1919 case 96:
1920 case 128:
1921 case 256:
1922 case 512:
1923 return decodeSpecialReg96Plus(Val);
1924 default:
1925 llvm_unreachable("unexpected immediate type");
1926 }
1927}
1928
1929// Bit 0 of DstY isn't stored in the instruction, because it's always the
1930// opposite of bit 0 of DstX.
1932 unsigned Val) const {
1933 int VDstXInd =
1934 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1935 assert(VDstXInd != -1);
1936 assert(Inst.getOperand(VDstXInd).isReg());
1937 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1938 Val |= ~XDstReg & 1;
1939 return createRegOperand(getVgprClassId(32), Val);
1940}
1941
1943 using namespace AMDGPU;
1944
1945 switch (Val) {
1946 // clang-format off
1947 case 102: return createRegOperand(FLAT_SCR_LO);
1948 case 103: return createRegOperand(FLAT_SCR_HI);
1949 case 104: return createRegOperand(XNACK_MASK_LO);
1950 case 105: return createRegOperand(XNACK_MASK_HI);
1951 case 106: return createRegOperand(VCC_LO);
1952 case 107: return createRegOperand(VCC_HI);
1953 case 108: return createRegOperand(TBA_LO);
1954 case 109: return createRegOperand(TBA_HI);
1955 case 110: return createRegOperand(TMA_LO);
1956 case 111: return createRegOperand(TMA_HI);
1957 case 124:
1958 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1959 case 125:
1960 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1961 case 126: return createRegOperand(EXEC_LO);
1962 case 127: return createRegOperand(EXEC_HI);
1963 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1964 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
1965 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1966 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1967 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1968 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1969 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1970 case 251: return createRegOperand(SRC_VCCZ);
1971 case 252: return createRegOperand(SRC_EXECZ);
1972 case 253: return createRegOperand(SRC_SCC);
1973 case 254: return createRegOperand(LDS_DIRECT);
1974 default: break;
1975 // clang-format on
1976 }
1977 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1978}
1979
1981 using namespace AMDGPU;
1982
1983 switch (Val) {
1984 case 102: return createRegOperand(FLAT_SCR);
1985 case 104: return createRegOperand(XNACK_MASK);
1986 case 106: return createRegOperand(VCC);
1987 case 108: return createRegOperand(TBA);
1988 case 110: return createRegOperand(TMA);
1989 case 124:
1990 if (isGFX11Plus())
1991 return createRegOperand(SGPR_NULL);
1992 break;
1993 case 125:
1994 if (!isGFX11Plus())
1995 return createRegOperand(SGPR_NULL);
1996 break;
1997 case 126: return createRegOperand(EXEC);
1998 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
1999 case 235: return createRegOperand(SRC_SHARED_BASE);
2000 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2001 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2002 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2003 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2004 case 251: return createRegOperand(SRC_VCCZ);
2005 case 252: return createRegOperand(SRC_EXECZ);
2006 case 253: return createRegOperand(SRC_SCC);
2007 default: break;
2008 }
2009 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2010}
2011
2013 using namespace AMDGPU;
2014
2015 switch (Val) {
2016 case 124:
2017 if (isGFX11Plus())
2018 return createRegOperand(SGPR_NULL);
2019 break;
2020 case 125:
2021 if (!isGFX11Plus())
2022 return createRegOperand(SGPR_NULL);
2023 break;
2024 default:
2025 break;
2026 }
2027 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2028}
2029
2031 const unsigned Val) const {
2032 using namespace AMDGPU::SDWA;
2033 using namespace AMDGPU::EncValues;
2034
2035 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2036 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2037 // XXX: cast to int is needed to avoid stupid warning:
2038 // compare with unsigned is always true
2039 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2040 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2041 return createRegOperand(getVgprClassId(Width),
2042 Val - SDWA9EncValues::SRC_VGPR_MIN);
2043 }
2044 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2045 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2046 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2047 return createSRegOperand(getSgprClassId(Width),
2048 Val - SDWA9EncValues::SRC_SGPR_MIN);
2049 }
2050 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2051 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2052 return createSRegOperand(getTtmpClassId(Width),
2053 Val - SDWA9EncValues::SRC_TTMP_MIN);
2054 }
2055
2056 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2057
2058 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2059 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2060 return MCOperand::createImm(SVal);
2061
2062 return decodeSpecialReg32(SVal);
2063 }
2064 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2065 return createRegOperand(getVgprClassId(Width), Val);
2066 llvm_unreachable("unsupported target");
2067}
2068
2070 return decodeSDWASrc(16, Val);
2071}
2072
2074 return decodeSDWASrc(32, Val);
2075}
2076
2078 using namespace AMDGPU::SDWA;
2079
2080 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2081 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2082 "SDWAVopcDst should be present only on GFX9+");
2083
2084 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2085
2086 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2087 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2088
2089 int TTmpIdx = getTTmpIdx(Val);
2090 if (TTmpIdx >= 0) {
2091 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2092 return createSRegOperand(TTmpClsId, TTmpIdx);
2093 }
2094 if (Val > SGPR_MAX) {
2095 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2096 }
2097 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2098 }
2099 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2100}
2101
2103 unsigned Val) const {
2104 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2105 ? decodeSrcOp(Inst, 32, Val)
2106 : decodeSrcOp(Inst, 64, Val);
2107}
2108
2110 unsigned Val) const {
2111 return decodeSrcOp(Inst, 32, Val);
2112}
2113
2116 return MCOperand();
2117 return MCOperand::createImm(Val);
2118}
2119
2121 using VersionField = AMDGPU::EncodingField<7, 0>;
2122 using W64Bit = AMDGPU::EncodingBit<13>;
2123 using W32Bit = AMDGPU::EncodingBit<14>;
2124 using MDPBit = AMDGPU::EncodingBit<15>;
2126
2127 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2128
2129 // Decode into a plain immediate if any unused bits are raised.
2130 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2131 return MCOperand::createImm(Imm);
2132
2133 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2134 const auto *I = find_if(
2135 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2136 return V.Code == Version;
2137 });
2138 MCContext &Ctx = getContext();
2139 const MCExpr *E;
2140 if (I == Versions.end())
2142 else
2143 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2144
2145 if (W64)
2146 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2147 if (W32)
2148 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2149 if (MDP)
2150 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2151
2152 return MCOperand::createExpr(E);
2153}
2154
2156 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2157}
2158
2160
2162 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2163}
2164
2166
2168
2172
2174 return STI.hasFeature(AMDGPU::FeatureGFX11);
2175}
2176
2180
2182 return STI.hasFeature(AMDGPU::FeatureGFX12);
2183}
2184
2188
2190
2192 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2193}
2194
2198
2199//===----------------------------------------------------------------------===//
2200// AMDGPU specific symbol handling
2201//===----------------------------------------------------------------------===//
2202
2203/// Print a string describing the reserved bit range specified by Mask with
2204/// offset BaseBytes for use in error comments. Mask is a single continuous
2205/// range of 1s surrounded by zeros. The format here is meant to align with the
2206/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2207static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2208 SmallString<32> Result;
2209 raw_svector_ostream S(Result);
2210
2211 int TrailingZeros = llvm::countr_zero(Mask);
2212 int PopCount = llvm::popcount(Mask);
2213
2214 if (PopCount == 1) {
2215 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2216 } else {
2217 S << "bits in range ("
2218 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2219 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2220 }
2221
2222 return Result;
2223}
2224
2225#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2226#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2227 do { \
2228 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2229 } while (0)
2230#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2231 do { \
2232 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2233 << GET_FIELD(MASK) << '\n'; \
2234 } while (0)
2235
2236#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2237 do { \
2238 if (FourByteBuffer & (MASK)) { \
2239 return createStringError(std::errc::invalid_argument, \
2240 "kernel descriptor " DESC \
2241 " reserved %s set" MSG, \
2242 getBitRangeFromMask((MASK), 0).c_str()); \
2243 } \
2244 } while (0)
2245
2246#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2247#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2248 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2249#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2250 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2251#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2252 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2253
2254// NOLINTNEXTLINE(readability-identifier-naming)
2256 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2257 using namespace amdhsa;
2258 StringRef Indent = "\t";
2259
2260 // We cannot accurately backward compute #VGPRs used from
2261 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2262 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2263 // simply calculate the inverse of what the assembler does.
2264
2265 uint32_t GranulatedWorkitemVGPRCount =
2266 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2267
2268 uint32_t NextFreeVGPR =
2269 (GranulatedWorkitemVGPRCount + 1) *
2270 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2271
2272 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2273
2274 // We cannot backward compute values used to calculate
2275 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2276 // directives can't be computed:
2277 // .amdhsa_reserve_vcc
2278 // .amdhsa_reserve_flat_scratch
2279 // .amdhsa_reserve_xnack_mask
2280 // They take their respective default values if not specified in the assembly.
2281 //
2282 // GRANULATED_WAVEFRONT_SGPR_COUNT
2283 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2284 //
2285 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2286 // are set to 0. So while disassembling we consider that:
2287 //
2288 // GRANULATED_WAVEFRONT_SGPR_COUNT
2289 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2290 //
2291 // The disassembler cannot recover the original values of those 3 directives.
2292
2293 uint32_t GranulatedWavefrontSGPRCount =
2294 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2295
2296 if (isGFX10Plus())
2297 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2298 "must be zero on gfx10+");
2299
2300 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2302
2303 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2305 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2306 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2307 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2308
2309 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2310
2311 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2312 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2313 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2314 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2315 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2316 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2317 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2318 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2319
2320 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2321
2322 if (!isGFX12Plus())
2323 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2324 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2325
2326 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2327
2328 if (!isGFX12Plus())
2329 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2330 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2331
2332 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2333 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2334
2335 // Bits [26].
2336 if (isGFX9Plus()) {
2337 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2338 } else {
2339 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2340 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2341 }
2342
2343 // Bits [27].
2344 if (isGFX1250()) {
2345 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2346 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2347 } else {
2348 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2349 "COMPUTE_PGM_RSRC1");
2350 }
2351
2352 // Bits [28].
2353 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2354
2355 // Bits [29-31].
2356 if (isGFX10Plus()) {
2357 // WGP_MODE is not available on GFX1250.
2358 if (!isGFX1250()) {
2359 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2360 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2361 }
2362 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2363 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2364 } else {
2365 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2366 "COMPUTE_PGM_RSRC1");
2367 }
2368
2369 if (isGFX12Plus())
2370 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2371 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2372
2373 return true;
2374}
2375
2376// NOLINTNEXTLINE(readability-identifier-naming)
2378 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2379 using namespace amdhsa;
2380 StringRef Indent = "\t";
2382 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2383 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2384 else
2385 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2386 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2387 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2388 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2389 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2390 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2391 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2392 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2393 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2394 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2395 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2396 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2397
2398 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2399 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2400 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2401
2403 ".amdhsa_exception_fp_ieee_invalid_op",
2404 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2405 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2406 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2408 ".amdhsa_exception_fp_ieee_div_zero",
2409 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2410 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2411 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2412 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2413 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2414 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2415 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2416 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2417 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2418
2419 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2420
2421 return true;
2422}
2423
2424// NOLINTNEXTLINE(readability-identifier-naming)
2426 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2427 using namespace amdhsa;
2428 StringRef Indent = "\t";
2429 if (isGFX90A()) {
2430 KdStream << Indent << ".amdhsa_accum_offset "
2431 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2432 << '\n';
2433
2434 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2435
2436 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2437 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2438 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2439 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2440 } else if (isGFX10Plus()) {
2441 // Bits [0-3].
2442 if (!isGFX12Plus()) {
2443 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2444 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2445 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2446 } else {
2448 "SHARED_VGPR_COUNT",
2449 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2450 }
2451 } else {
2452 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2453 "COMPUTE_PGM_RSRC3",
2454 "must be zero on gfx12+");
2455 }
2456
2457 // Bits [4-11].
2458 if (isGFX11()) {
2459 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2460 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2461 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2462 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2463 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2464 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2465 } else if (isGFX12Plus()) {
2466 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2467 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2468 } else {
2469 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2470 "COMPUTE_PGM_RSRC3",
2471 "must be zero on gfx10");
2472 }
2473
2474 // Bits [12].
2475 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2476 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2477
2478 // Bits [13].
2479 if (isGFX12Plus()) {
2481 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2482 } else {
2483 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2484 "COMPUTE_PGM_RSRC3",
2485 "must be zero on gfx10 or gfx11");
2486 }
2487
2488 // Bits [14-21].
2489 if (isGFX1250()) {
2490 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2491 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2493 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2495 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2497 "ENABLE_DIDT_THROTTLE",
2498 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2499 } else {
2500 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2501 "COMPUTE_PGM_RSRC3",
2502 "must be zero on gfx10+");
2503 }
2504
2505 // Bits [22-30].
2506 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2507 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2508
2509 // Bits [31].
2510 if (isGFX11Plus()) {
2512 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2513 } else {
2514 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2515 "COMPUTE_PGM_RSRC3",
2516 "must be zero on gfx10");
2517 }
2518 } else if (FourByteBuffer) {
2519 return createStringError(
2520 std::errc::invalid_argument,
2521 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2522 }
2523 return true;
2524}
2525#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2526#undef PRINT_DIRECTIVE
2527#undef GET_FIELD
2528#undef CHECK_RESERVED_BITS_IMPL
2529#undef CHECK_RESERVED_BITS
2530#undef CHECK_RESERVED_BITS_MSG
2531#undef CHECK_RESERVED_BITS_DESC
2532#undef CHECK_RESERVED_BITS_DESC_MSG
2533
2534/// Create an error object to return from onSymbolStart for reserved kernel
2535/// descriptor bits being set.
2536static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2537 const char *Msg = "") {
2538 return createStringError(
2539 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2540 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2541}
2542
2543/// Create an error object to return from onSymbolStart for reserved kernel
2544/// descriptor bytes being set.
2545static Error createReservedKDBytesError(unsigned BaseInBytes,
2546 unsigned WidthInBytes) {
2547 // Create an error comment in the same format as the "Kernel Descriptor"
2548 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2549 return createStringError(
2550 std::errc::invalid_argument,
2551 "kernel descriptor reserved bits in range (%u:%u) set",
2552 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2553}
2554
2557 raw_string_ostream &KdStream) const {
2558#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2559 do { \
2560 KdStream << Indent << DIRECTIVE " " \
2561 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2562 } while (0)
2563
2564 uint16_t TwoByteBuffer = 0;
2565 uint32_t FourByteBuffer = 0;
2566
2567 StringRef ReservedBytes;
2568 StringRef Indent = "\t";
2569
2570 assert(Bytes.size() == 64);
2571 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2572
2573 switch (Cursor.tell()) {
2575 FourByteBuffer = DE.getU32(Cursor);
2576 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2577 << '\n';
2578 return true;
2579
2581 FourByteBuffer = DE.getU32(Cursor);
2582 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2583 << FourByteBuffer << '\n';
2584 return true;
2585
2587 FourByteBuffer = DE.getU32(Cursor);
2588 KdStream << Indent << ".amdhsa_kernarg_size "
2589 << FourByteBuffer << '\n';
2590 return true;
2591
2593 // 4 reserved bytes, must be 0.
2594 ReservedBytes = DE.getBytes(Cursor, 4);
2595 for (int I = 0; I < 4; ++I) {
2596 if (ReservedBytes[I] != 0)
2598 }
2599 return true;
2600
2602 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2603 // So far no directive controls this for Code Object V3, so simply skip for
2604 // disassembly.
2605 DE.skip(Cursor, 8);
2606 return true;
2607
2609 // 20 reserved bytes, must be 0.
2610 ReservedBytes = DE.getBytes(Cursor, 20);
2611 for (int I = 0; I < 20; ++I) {
2612 if (ReservedBytes[I] != 0)
2614 }
2615 return true;
2616
2618 FourByteBuffer = DE.getU32(Cursor);
2619 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2620
2622 FourByteBuffer = DE.getU32(Cursor);
2623 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2624
2626 FourByteBuffer = DE.getU32(Cursor);
2627 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2628
2630 using namespace amdhsa;
2631 TwoByteBuffer = DE.getU16(Cursor);
2632
2634 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2635 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2636 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2637 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2638 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2640 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2641 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2642 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2643 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2645 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2647 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2649
2650 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2651 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2653
2654 // Reserved for GFX9
2655 if (isGFX9() &&
2656 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2658 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2659 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2660 }
2661 if (isGFX10Plus()) {
2662 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2663 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2664 }
2665
2666 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2667 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2668 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2669
2670 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2671 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2673 }
2674
2675 return true;
2676
2678 using namespace amdhsa;
2679 TwoByteBuffer = DE.getU16(Cursor);
2680 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2681 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2682 KERNARG_PRELOAD_SPEC_LENGTH);
2683 }
2684
2685 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2686 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2687 KERNARG_PRELOAD_SPEC_OFFSET);
2688 }
2689 return true;
2690
2692 // 4 bytes from here are reserved, must be 0.
2693 ReservedBytes = DE.getBytes(Cursor, 4);
2694 for (int I = 0; I < 4; ++I) {
2695 if (ReservedBytes[I] != 0)
2697 }
2698 return true;
2699
2700 default:
2701 llvm_unreachable("Unhandled index. Case statements cover everything.");
2702 return true;
2703 }
2704#undef PRINT_DIRECTIVE
2705}
2706
2708 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2709
2710 // CP microcode requires the kernel descriptor to be 64 aligned.
2711 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2712 return createStringError(std::errc::invalid_argument,
2713 "kernel descriptor must be 64-byte aligned");
2714
2715 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2716 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2717 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2718 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2719 // when required.
2720 if (isGFX10Plus()) {
2721 uint16_t KernelCodeProperties =
2724 EnableWavefrontSize32 =
2725 AMDHSA_BITS_GET(KernelCodeProperties,
2726 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2727 }
2728
2729 std::string Kd;
2730 raw_string_ostream KdStream(Kd);
2731 KdStream << ".amdhsa_kernel " << KdName << '\n';
2732
2734 while (C && C.tell() < Bytes.size()) {
2735 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2736
2737 cantFail(C.takeError());
2738
2739 if (!Res)
2740 return Res;
2741 }
2742 KdStream << ".end_amdhsa_kernel\n";
2743 outs() << KdStream.str();
2744 return true;
2745}
2746
2748 uint64_t &Size,
2749 ArrayRef<uint8_t> Bytes,
2750 uint64_t Address) const {
2751 // Right now only kernel descriptor needs to be handled.
2752 // We ignore all other symbols for target specific handling.
2753 // TODO:
2754 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2755 // Object V2 and V3 when symbols are marked protected.
2756
2757 // amd_kernel_code_t for Code Object V2.
2758 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2759 Size = 256;
2760 return createStringError(std::errc::invalid_argument,
2761 "code object v2 is not supported");
2762 }
2763
2764 // Code Object V3 kernel descriptors.
2765 StringRef Name = Symbol.Name;
2766 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2767 Size = 64; // Size = 64 regardless of success or failure.
2768 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2769 }
2770
2771 return false;
2772}
2773
2774const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2775 int64_t Val) {
2776 MCContext &Ctx = getContext();
2777 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2778 // Note: only set value to Val on a new symbol in case an dissassembler
2779 // has already been initialized in this context.
2780 if (!Sym->isVariable()) {
2782 } else {
2783 int64_t Res = ~Val;
2784 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2785 if (!Valid || Res != Val)
2786 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2787 }
2788 return MCSymbolRefExpr::create(Sym, Ctx);
2789}
2790
2792 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2793
2794 // Check for MUBUF and MTBUF instructions
2795 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2796 return true;
2797
2798 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2799 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2800 return true;
2801
2802 return false;
2803}
2804
2805//===----------------------------------------------------------------------===//
2806// AMDGPUSymbolizer
2807//===----------------------------------------------------------------------===//
2808
2809// Try to find symbol name for specified label
2811 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2812 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2813 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2814
2815 if (!IsBranch) {
2816 return false;
2817 }
2818
2819 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2820 if (!Symbols)
2821 return false;
2822
2823 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2824 return Val.Addr == static_cast<uint64_t>(Value) &&
2825 Val.Type == ELF::STT_NOTYPE;
2826 });
2827 if (Result != Symbols->end()) {
2828 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2829 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2831 return true;
2832 }
2833 // Add to list of referenced addresses, so caller can synthesize a label.
2834 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2835 return false;
2836}
2837
2839 int64_t Value,
2840 uint64_t Address) {
2841 llvm_unreachable("unimplemented");
2842}
2843
2844//===----------------------------------------------------------------------===//
2845// Initialization
2846//===----------------------------------------------------------------------===//
2847
2849 LLVMOpInfoCallback /*GetOpInfo*/,
2850 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2851 void *DisInfo,
2852 MCContext *Ctx,
2853 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2854 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2855}
2856
2858 const MCSubtargetInfo &STI,
2859 MCContext &Ctx) {
2860 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2861}
2862
2863extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
unsigned const MachineRegisterInfo * MRI
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeLiteral64Constant(const MCInst &Inst) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc, bool ExtendFP64) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
const T * data() const
Definition ArrayRef.h:144
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
EncodingField< Bit, Bit, D > EncodingBit
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1411
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ STT_OBJECT
Definition ELF.h:1412
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:58
uint16_t read16(const void *P, endianness E)
Definition Endian.h:405
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
Op::Description Desc
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.