LLVM 19.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInstrDesc.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "amdgpu-disassembler"
41
42#define SGPR_MAX \
43 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44 : AMDGPU::EncValues::SGPR_MAX_SI)
45
47
49 MCContext &Ctx) {
50 if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51 !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
52 MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
53 // If there is no default wave size it must be a generation before gfx10,
54 // these have FeatureWavefrontSize64 in their definition already. For gfx10+
55 // set wave32 as a default.
56 STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
57 return STICopy;
58 }
59
60 return STI;
61}
62
64 MCContext &Ctx, MCInstrInfo const *MCII)
65 : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
66 MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
69 // ToDo: AMDGPUDisassembler supports only VI ISA.
70 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
71 report_fatal_error("Disassembly not yet supported for subtarget");
72
73 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
74 createConstantSymbolExpr(Symbol, Code);
75
76 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
77 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
78 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
79}
80
83}
84
86addOperand(MCInst &Inst, const MCOperand& Opnd) {
87 Inst.addOperand(Opnd);
88 return Opnd.isValid() ?
91}
92
94 uint16_t NameIdx) {
95 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
96 if (OpIdx != -1) {
97 auto I = MI.begin();
98 std::advance(I, OpIdx);
99 MI.insert(I, Op);
100 }
101 return OpIdx;
102}
103
104static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
106 const MCDisassembler *Decoder) {
107 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108
109 // Our branches take a simm16, but we need two extra bits to account for the
110 // factor of 4.
111 APInt SignedOffset(18, Imm * 4, true);
112 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
113
114 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
116 return addOperand(Inst, MCOperand::createImm(Imm));
117}
118
120 const MCDisassembler *Decoder) {
121 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122 int64_t Offset;
123 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
124 Offset = SignExtend64<24>(Imm);
125 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
126 Offset = Imm & 0xFFFFF;
127 } else { // GFX9+ supports 21-bit signed offsets.
128 Offset = SignExtend64<21>(Imm);
129 }
131}
132
133static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
136 return addOperand(Inst, DAsm->decodeBoolReg(Val));
137}
138
139static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
141 const MCDisassembler *Decoder) {
142 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
143 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
144}
145
146static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
147 const MCDisassembler *Decoder) {
148 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
149 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
150}
151
152#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
153 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
154 uint64_t /*Addr*/, \
155 const MCDisassembler *Decoder) { \
156 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
157 return addOperand(Inst, DAsm->DecoderName(Imm)); \
158 }
159
160// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
161// number of register. Used by VGPR only and AGPR only operands.
162#define DECODE_OPERAND_REG_8(RegClass) \
163 static DecodeStatus Decode##RegClass##RegisterClass( \
164 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
165 const MCDisassembler *Decoder) { \
166 assert(Imm < (1 << 8) && "8-bit encoding"); \
167 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
168 return addOperand( \
169 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
170 }
171
172#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
173 ImmWidth) \
174 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
175 const MCDisassembler *Decoder) { \
176 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
177 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
178 return addOperand(Inst, \
179 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
180 MandatoryLiteral, ImmWidth)); \
181 }
182
183static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
185 unsigned Imm, unsigned EncImm,
186 bool MandatoryLiteral, unsigned ImmWidth,
188 const MCDisassembler *Decoder) {
189 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
190 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
191 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
192 ImmWidth, Sema));
193}
194
195// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
196// get register class. Used by SGPR only operands.
197#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
198 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
199
200// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
201// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
202// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
203// Used by AV_ register classes (AGPR or VGPR only register operands).
204template <AMDGPUDisassembler::OpWidthTy OpWidth>
205static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
206 const MCDisassembler *Decoder) {
207 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
208 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
209}
210
211// Decoder for Src(9-bit encoding) registers only.
212template <AMDGPUDisassembler::OpWidthTy OpWidth>
213static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
214 uint64_t /* Addr */,
215 const MCDisassembler *Decoder) {
216 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
218}
219
220// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
221// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
222// only.
223template <AMDGPUDisassembler::OpWidthTy OpWidth>
224static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
228}
229
230// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
231// Imm{9} is acc, registers only.
232template <AMDGPUDisassembler::OpWidthTy OpWidth>
233static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
234 uint64_t /* Addr */,
235 const MCDisassembler *Decoder) {
236 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
238}
239
240// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
241// register from RegClass or immediate. Registers that don't belong to RegClass
242// will be decoded and InstPrinter will report warning. Immediate will be
243// decoded into constant of size ImmWidth, should match width of immediate used
244// by OperandType (important for floating point types).
245template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
246 unsigned OperandSemantics>
247static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
248 uint64_t /* Addr */,
249 const MCDisassembler *Decoder) {
250 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
251 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
252}
253
254// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
255// and decode using 'enum10' from decodeSrcOp.
256template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
257 unsigned OperandSemantics>
258static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
259 uint64_t /* Addr */,
260 const MCDisassembler *Decoder) {
261 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
262 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
263}
264
265template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
266 unsigned OperandSemantics>
268 uint64_t /* Addr */,
269 const MCDisassembler *Decoder) {
270 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
271 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
272}
273
274// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
275// when RegisterClass is used as an operand. Most often used for destination
276// operands.
277
279DECODE_OPERAND_REG_8(VGPR_32_Lo128)
282DECODE_OPERAND_REG_8(VReg_128)
283DECODE_OPERAND_REG_8(VReg_256)
284DECODE_OPERAND_REG_8(VReg_288)
285DECODE_OPERAND_REG_8(VReg_352)
286DECODE_OPERAND_REG_8(VReg_384)
287DECODE_OPERAND_REG_8(VReg_512)
288DECODE_OPERAND_REG_8(VReg_1024)
289
290DECODE_OPERAND_REG_7(SReg_32, OPW32)
291DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
292DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
293DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
294DECODE_OPERAND_REG_7(SReg_64, OPW64)
295DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
296DECODE_OPERAND_REG_7(SReg_96, OPW96)
297DECODE_OPERAND_REG_7(SReg_128, OPW128)
298DECODE_OPERAND_REG_7(SReg_256, OPW256)
299DECODE_OPERAND_REG_7(SReg_512, OPW512)
300
303DECODE_OPERAND_REG_8(AReg_128)
304DECODE_OPERAND_REG_8(AReg_256)
305DECODE_OPERAND_REG_8(AReg_512)
306DECODE_OPERAND_REG_8(AReg_1024)
307
309 uint64_t /*Addr*/,
310 const MCDisassembler *Decoder) {
311 assert(isUInt<10>(Imm) && "10-bit encoding expected");
312 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
313
314 bool IsHi = Imm & (1 << 9);
315 unsigned RegIdx = Imm & 0xff;
316 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318}
319
320static DecodeStatus
322 const MCDisassembler *Decoder) {
323 assert(isUInt<8>(Imm) && "8-bit encoding expected");
324
325 bool IsHi = Imm & (1 << 7);
326 unsigned RegIdx = Imm & 0x7f;
327 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329}
330
332 uint64_t /*Addr*/,
333 const MCDisassembler *Decoder) {
334 assert(isUInt<9>(Imm) && "9-bit encoding expected");
335
336 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337 bool IsVGPR = Imm & (1 << 8);
338 if (IsVGPR) {
339 bool IsHi = Imm & (1 << 7);
340 unsigned RegIdx = Imm & 0x7f;
341 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342 }
343 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344 Imm & 0xFF, false, 16));
345}
346
347static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348 uint64_t /*Addr*/,
349 const MCDisassembler *Decoder) {
350 assert(isUInt<10>(Imm) && "10-bit encoding expected");
351
352 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353 bool IsVGPR = Imm & (1 << 8);
354 if (IsVGPR) {
355 bool IsHi = Imm & (1 << 9);
356 unsigned RegIdx = Imm & 0xff;
357 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358 }
359 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360 Imm & 0xFF, false, 16));
361}
362
363static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
365 const MCDisassembler *Decoder) {
366 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
367 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
368}
369
370static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
371 uint64_t Addr, const void *Decoder) {
372 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
373 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
374}
375
376static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
377 const MCRegisterInfo *MRI) {
378 if (OpIdx < 0)
379 return false;
380
381 const MCOperand &Op = Inst.getOperand(OpIdx);
382 if (!Op.isReg())
383 return false;
384
385 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
386 auto Reg = Sub ? Sub : Op.getReg();
387 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
388}
389
390static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
392 const MCDisassembler *Decoder) {
393 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
394 if (!DAsm->isGFX90A()) {
395 Imm &= 511;
396 } else {
397 // If atomic has both vdata and vdst their register classes are tied.
398 // The bit is decoded along with the vdst, first operand. We need to
399 // change register class to AGPR if vdst was AGPR.
400 // If a DS instruction has both data0 and data1 their register classes
401 // are also tied.
402 unsigned Opc = Inst.getOpcode();
403 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
404 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
405 : AMDGPU::OpName::vdata;
406 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
407 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
408 if ((int)Inst.getNumOperands() == DataIdx) {
409 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
410 if (IsAGPROperand(Inst, DstIdx, MRI))
411 Imm |= 512;
412 }
413
414 if (TSFlags & SIInstrFlags::DS) {
415 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
416 if ((int)Inst.getNumOperands() == Data2Idx &&
417 IsAGPROperand(Inst, DataIdx, MRI))
418 Imm |= 512;
419 }
420 }
421 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
422}
423
424template <AMDGPUDisassembler::OpWidthTy Opw>
425static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
426 uint64_t /* Addr */,
427 const MCDisassembler *Decoder) {
428 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
429}
430
431static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
433 const MCDisassembler *Decoder) {
434 assert(Imm < (1 << 9) && "9-bit encoding");
435 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436 return addOperand(Inst,
437 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
439}
440
441#define DECODE_SDWA(DecName) \
442DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
443
444DECODE_SDWA(Src32)
445DECODE_SDWA(Src16)
446DECODE_SDWA(VopcDst)
447
448static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
449 uint64_t /* Addr */,
450 const MCDisassembler *Decoder) {
451 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
452 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
453}
454
455#include "AMDGPUGenDisassemblerTables.inc"
456
457//===----------------------------------------------------------------------===//
458//
459//===----------------------------------------------------------------------===//
460
461template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
462 assert(Bytes.size() >= sizeof(T));
463 const auto Res =
464 support::endian::read<T, llvm::endianness::little>(Bytes.data());
465 Bytes = Bytes.slice(sizeof(T));
466 return Res;
467}
468
470 assert(Bytes.size() >= 12);
471 uint64_t Lo =
472 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
473 Bytes = Bytes.slice(8);
474 uint64_t Hi =
475 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
476 Bytes = Bytes.slice(4);
477 return DecoderUInt128(Lo, Hi);
478}
479
481 ArrayRef<uint8_t> Bytes_,
483 raw_ostream &CS) const {
484 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
485 Bytes = Bytes_.slice(0, MaxInstBytesNum);
486
487 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
488 // there are fewer bytes left). This will be overridden on success.
489 Size = std::min((size_t)4, Bytes_.size());
490
491 do {
492 // ToDo: better to switch encoding length using some bit predicate
493 // but it is unknown yet, so try all we can
494
495 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
496 // encodings
497 if (isGFX11Plus() && Bytes.size() >= 12 ) {
498 DecoderUInt128 DecW = eat12Bytes(Bytes);
499
500 if (isGFX11() &&
501 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
502 DecW, Address, CS))
503 break;
504
505 if (isGFX12() &&
506 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
507 DecW, Address, CS))
508 break;
509
510 if (isGFX12() &&
511 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
512 break;
513
514 // Reinitialize Bytes
515 Bytes = Bytes_.slice(0, MaxInstBytesNum);
516 }
517
518 if (Bytes.size() >= 8) {
519 const uint64_t QW = eatBytes<uint64_t>(Bytes);
520
521 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
522 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
523 break;
524
525 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
526 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
527 break;
528
529 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
530 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
531 // table first so we print the correct name.
532 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
533 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
534 break;
535
536 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
537 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
538 break;
539
540 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
541 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
542 break;
543
544 if ((isVI() || isGFX9()) &&
545 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
546 break;
547
548 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
549 break;
550
551 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
552 break;
553
554 if (isGFX12() &&
555 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
556 Address, CS))
557 break;
558
559 if (isGFX11() &&
560 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
561 Address, CS))
562 break;
563
564 if (isGFX11() &&
565 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
566 break;
567
568 if (isGFX12() &&
569 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
570 break;
571
572 // Reinitialize Bytes
573 Bytes = Bytes_.slice(0, MaxInstBytesNum);
574 }
575
576 // Try decode 32-bit instruction
577 if (Bytes.size() >= 4) {
578 const uint32_t DW = eatBytes<uint32_t>(Bytes);
579
580 if ((isVI() || isGFX9()) &&
581 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
582 break;
583
584 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
585 break;
586
587 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
588 break;
589
590 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
591 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
592 break;
593
594 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
595 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
596 break;
597
598 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
599 break;
600
601 if (isGFX11() &&
602 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
603 Address, CS))
604 break;
605
606 if (isGFX12() &&
607 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
608 Address, CS))
609 break;
610 }
611
613 } while (false);
614
615 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
616 if (isMacDPP(MI))
618
619 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
621 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
622 AMDGPU::isVOPC64DPP(MI.getOpcode()))
623 convertVOPCDPPInst(MI); // Special VOP3 case
624 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
625 -1)
627 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
628 convertVOP3DPPInst(MI); // Regular VOP3 case
629 }
630
631 if (AMDGPU::isMAC(MI.getOpcode())) {
632 // Insert dummy unused src2_modifiers.
634 AMDGPU::OpName::src2_modifiers);
635 }
636
637 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
638 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
639 // Insert dummy unused src2_modifiers.
641 AMDGPU::OpName::src2_modifiers);
642 }
643
644 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
646 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
647 }
648
649 if (MCII->get(MI.getOpcode()).TSFlags &
651 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
652 AMDGPU::OpName::cpol);
653 if (CPolPos != -1) {
654 unsigned CPol =
655 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
657 if (MI.getNumOperands() <= (unsigned)CPolPos) {
659 AMDGPU::OpName::cpol);
660 } else if (CPol) {
661 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
662 }
663 }
664 }
665
666 if ((MCII->get(MI.getOpcode()).TSFlags &
668 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
669 // GFX90A lost TFE, its place is occupied by ACC.
670 int TFEOpIdx =
671 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
672 if (TFEOpIdx != -1) {
673 auto TFEIter = MI.begin();
674 std::advance(TFEIter, TFEOpIdx);
675 MI.insert(TFEIter, MCOperand::createImm(0));
676 }
677 }
678
679 if (MCII->get(MI.getOpcode()).TSFlags &
681 int SWZOpIdx =
682 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
683 if (SWZOpIdx != -1) {
684 auto SWZIter = MI.begin();
685 std::advance(SWZIter, SWZOpIdx);
686 MI.insert(SWZIter, MCOperand::createImm(0));
687 }
688 }
689
690 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
691 int VAddr0Idx =
692 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
693 int RsrcIdx =
694 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
695 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
696 if (VAddr0Idx >= 0 && NSAArgs > 0) {
697 unsigned NSAWords = (NSAArgs + 3) / 4;
698 if (Bytes.size() < 4 * NSAWords)
700 for (unsigned i = 0; i < NSAArgs; ++i) {
701 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
702 auto VAddrRCID =
703 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
704 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
705 }
706 Bytes = Bytes.slice(4 * NSAWords);
707 }
708
710 }
711
712 if (MCII->get(MI.getOpcode()).TSFlags &
715
716 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
718
719 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
721
722 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
724
725 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
726 AMDGPU::OpName::vdst_in);
727 if (VDstIn_Idx != -1) {
728 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
730 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
731 !MI.getOperand(VDstIn_Idx).isReg() ||
732 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
733 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
734 MI.erase(&MI.getOperand(VDstIn_Idx));
736 MCOperand::createReg(MI.getOperand(Tied).getReg()),
737 AMDGPU::OpName::vdst_in);
738 }
739 }
740
741 int ImmLitIdx =
742 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
743 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
744 if (ImmLitIdx != -1 && !IsSOPK)
745 convertFMAanyK(MI, ImmLitIdx);
746
747 Size = MaxInstBytesNum - Bytes.size();
749}
750
752 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
753 // The MCInst still has these fields even though they are no longer encoded
754 // in the GFX11 instruction.
755 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
756 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
757 }
758}
759
761 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
762 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
763 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
764 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
765 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
766 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
767 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
768 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
769 // The MCInst has this field that is not directly encoded in the
770 // instruction.
771 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
772 }
773}
774
776 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
777 STI.hasFeature(AMDGPU::FeatureGFX10)) {
778 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
779 // VOPC - insert clamp
780 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
781 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
782 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
783 if (SDst != -1) {
784 // VOPC - insert VCC register as sdst
786 AMDGPU::OpName::sdst);
787 } else {
788 // VOP1/2 - insert omod if present in instruction
789 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
790 }
791 }
792}
793
795 unsigned OpSel = 0;
796 unsigned OpSelHi = 0;
797 unsigned NegLo = 0;
798 unsigned NegHi = 0;
799};
800
801// Reconstruct values of VOP3/VOP3P operands such as op_sel.
802// Note that these values do not affect disassembler output,
803// so this is only necessary for consistency with src_modifiers.
805 bool IsVOP3P = false) {
806 VOPModifiers Modifiers;
807 unsigned Opc = MI.getOpcode();
808 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
809 AMDGPU::OpName::src1_modifiers,
810 AMDGPU::OpName::src2_modifiers};
811 for (int J = 0; J < 3; ++J) {
812 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
813 if (OpIdx == -1)
814 continue;
815
816 unsigned Val = MI.getOperand(OpIdx).getImm();
817
818 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
819 if (IsVOP3P) {
820 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
821 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
822 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
823 } else if (J == 0) {
824 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
825 }
826 }
827
828 return Modifiers;
829}
830
831// Instructions decode the op_sel/suffix bits into the src_modifier
832// operands. Copy those bits into the src operands for true16 VGPRs.
834 const unsigned Opc = MI.getOpcode();
835 const MCRegisterClass &ConversionRC =
836 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
837 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
838 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
840 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
842 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
844 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
846 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
847 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
848 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
849 if (OpIdx == -1 || OpModsIdx == -1)
850 continue;
851 MCOperand &Op = MI.getOperand(OpIdx);
852 if (!Op.isReg())
853 continue;
854 if (!ConversionRC.contains(Op.getReg()))
855 continue;
856 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
857 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
858 unsigned ModVal = OpMods.getImm();
859 if (ModVal & OpSelMask) { // isHi
860 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
861 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
862 }
863 }
864}
865
866// MAC opcodes have special old and src2 operands.
867// src2 is tied to dst, while old is not tied (but assumed to be).
869 constexpr int DST_IDX = 0;
870 auto Opcode = MI.getOpcode();
871 const auto &Desc = MCII->get(Opcode);
872 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
873
874 if (OldIdx != -1 && Desc.getOperandConstraint(
875 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
876 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
877 assert(Desc.getOperandConstraint(
878 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
880 (void)DST_IDX;
881 return true;
882 }
883
884 return false;
885}
886
887// Create dummy old operand and insert dummy unused src2_modifiers
889 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
890 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
892 AMDGPU::OpName::src2_modifiers);
893}
894
896 unsigned Opc = MI.getOpcode();
897
898 int VDstInIdx =
899 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
900 if (VDstInIdx != -1)
901 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
902
903 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
904 if (MI.getNumOperands() < DescNumOps &&
905 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
907 auto Mods = collectVOPModifiers(MI);
909 AMDGPU::OpName::op_sel);
910 } else {
911 // Insert dummy unused src modifiers.
912 if (MI.getNumOperands() < DescNumOps &&
913 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
915 AMDGPU::OpName::src0_modifiers);
916
917 if (MI.getNumOperands() < DescNumOps &&
918 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
920 AMDGPU::OpName::src1_modifiers);
921 }
922}
923
926
927 int VDstInIdx =
928 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
929 if (VDstInIdx != -1)
930 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
931
932 unsigned Opc = MI.getOpcode();
933 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
934 if (MI.getNumOperands() < DescNumOps &&
935 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
936 auto Mods = collectVOPModifiers(MI);
938 AMDGPU::OpName::op_sel);
939 }
940}
941
942// Note that before gfx10, the MIMG encoding provided no information about
943// VADDR size. Consequently, decoded instructions always show address as if it
944// has 1 dword, which could be not really so.
946 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
947
948 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
949 AMDGPU::OpName::vdst);
950
951 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
952 AMDGPU::OpName::vdata);
953 int VAddr0Idx =
954 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
955 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
956 : AMDGPU::OpName::rsrc;
957 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
958 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
959 AMDGPU::OpName::dmask);
960
961 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
962 AMDGPU::OpName::tfe);
963 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
964 AMDGPU::OpName::d16);
965
966 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
967 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
969
970 assert(VDataIdx != -1);
971 if (BaseOpcode->BVH) {
972 // Add A16 operand for intersect_ray instructions
973 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
974 return;
975 }
976
977 bool IsAtomic = (VDstIdx != -1);
978 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
979 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
980 bool IsNSA = false;
981 bool IsPartialNSA = false;
982 unsigned AddrSize = Info->VAddrDwords;
983
984 if (isGFX10Plus()) {
985 unsigned DimIdx =
986 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
987 int A16Idx =
988 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
989 const AMDGPU::MIMGDimInfo *Dim =
990 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
991 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
992
993 AddrSize =
994 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
995
996 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
997 // VIMAGE insts other than BVH never use vaddr4.
998 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
999 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1000 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1001 if (!IsNSA) {
1002 if (!IsVSample && AddrSize > 12)
1003 AddrSize = 16;
1004 } else {
1005 if (AddrSize > Info->VAddrDwords) {
1006 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1007 // The NSA encoding does not contain enough operands for the
1008 // combination of base opcode / dimension. Should this be an error?
1009 return;
1010 }
1011 IsPartialNSA = true;
1012 }
1013 }
1014 }
1015
1016 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1017 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1018
1019 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1020 if (D16 && AMDGPU::hasPackedD16(STI)) {
1021 DstSize = (DstSize + 1) / 2;
1022 }
1023
1024 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1025 DstSize += 1;
1026
1027 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1028 return;
1029
1030 int NewOpcode =
1031 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1032 if (NewOpcode == -1)
1033 return;
1034
1035 // Widen the register to the correct number of enabled channels.
1036 unsigned NewVdata = AMDGPU::NoRegister;
1037 if (DstSize != Info->VDataDwords) {
1038 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1039
1040 // Get first subregister of VData
1041 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1042 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1043 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1044
1045 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1046 &MRI.getRegClass(DataRCID));
1047 if (NewVdata == AMDGPU::NoRegister) {
1048 // It's possible to encode this such that the low register + enabled
1049 // components exceeds the register count.
1050 return;
1051 }
1052 }
1053
1054 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1055 // If using partial NSA on GFX11+ widen last address register.
1056 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1057 unsigned NewVAddrSA = AMDGPU::NoRegister;
1058 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1059 AddrSize != Info->VAddrDwords) {
1060 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1061 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1062 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1063
1064 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1065 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1066 &MRI.getRegClass(AddrRCID));
1067 if (!NewVAddrSA)
1068 return;
1069 }
1070
1071 MI.setOpcode(NewOpcode);
1072
1073 if (NewVdata != AMDGPU::NoRegister) {
1074 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1075
1076 if (IsAtomic) {
1077 // Atomic operations have an additional operand (a copy of data)
1078 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1079 }
1080 }
1081
1082 if (NewVAddrSA) {
1083 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1084 } else if (IsNSA) {
1085 assert(AddrSize <= Info->VAddrDwords);
1086 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1087 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1088 }
1089}
1090
1091// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1092// decoder only adds to src_modifiers, so manually add the bits to the other
1093// operands.
1095 unsigned Opc = MI.getOpcode();
1096 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1097 auto Mods = collectVOPModifiers(MI, true);
1098
1099 if (MI.getNumOperands() < DescNumOps &&
1100 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1101 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1102
1103 if (MI.getNumOperands() < DescNumOps &&
1104 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1106 AMDGPU::OpName::op_sel);
1107 if (MI.getNumOperands() < DescNumOps &&
1108 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1110 AMDGPU::OpName::op_sel_hi);
1111 if (MI.getNumOperands() < DescNumOps &&
1112 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1114 AMDGPU::OpName::neg_lo);
1115 if (MI.getNumOperands() < DescNumOps &&
1116 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1118 AMDGPU::OpName::neg_hi);
1119}
1120
1121// Create dummy old operand and insert optional operands
1123 unsigned Opc = MI.getOpcode();
1124 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1125
1126 if (MI.getNumOperands() < DescNumOps &&
1127 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1128 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1129
1130 if (MI.getNumOperands() < DescNumOps &&
1131 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1133 AMDGPU::OpName::src0_modifiers);
1134
1135 if (MI.getNumOperands() < DescNumOps &&
1136 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1138 AMDGPU::OpName::src1_modifiers);
1139}
1140
1141void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1142 assert(HasLiteral && "Should have decoded a literal");
1143 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1144 unsigned DescNumOps = Desc.getNumOperands();
1146 AMDGPU::OpName::immDeferred);
1147 assert(DescNumOps == MI.getNumOperands());
1148 for (unsigned I = 0; I < DescNumOps; ++I) {
1149 auto &Op = MI.getOperand(I);
1150 auto OpType = Desc.operands()[I].OperandType;
1151 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1153 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1154 IsDeferredOp)
1155 Op.setImm(Literal);
1156 }
1157}
1158
1159const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1160 return getContext().getRegisterInfo()->
1161 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1162}
1163
1164inline
1166 const Twine& ErrMsg) const {
1167 *CommentStream << "Error: " + ErrMsg;
1168
1169 // ToDo: add support for error operands to MCInst.h
1170 // return MCOperand::createError(V);
1171 return MCOperand();
1172}
1173
1174inline
1177}
1178
1179inline
1181 unsigned Val) const {
1182 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1183 if (Val >= RegCl.getNumRegs())
1184 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1185 ": unknown register " + Twine(Val));
1186 return createRegOperand(RegCl.getRegister(Val));
1187}
1188
1189inline
1191 unsigned Val) const {
1192 // ToDo: SI/CI have 104 SGPRs, VI - 102
1193 // Valery: here we accepting as much as we can, let assembler sort it out
1194 int shift = 0;
1195 switch (SRegClassID) {
1196 case AMDGPU::SGPR_32RegClassID:
1197 case AMDGPU::TTMP_32RegClassID:
1198 break;
1199 case AMDGPU::SGPR_64RegClassID:
1200 case AMDGPU::TTMP_64RegClassID:
1201 shift = 1;
1202 break;
1203 case AMDGPU::SGPR_96RegClassID:
1204 case AMDGPU::TTMP_96RegClassID:
1205 case AMDGPU::SGPR_128RegClassID:
1206 case AMDGPU::TTMP_128RegClassID:
1207 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1208 // this bundle?
1209 case AMDGPU::SGPR_256RegClassID:
1210 case AMDGPU::TTMP_256RegClassID:
1211 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1212 // this bundle?
1213 case AMDGPU::SGPR_288RegClassID:
1214 case AMDGPU::TTMP_288RegClassID:
1215 case AMDGPU::SGPR_320RegClassID:
1216 case AMDGPU::TTMP_320RegClassID:
1217 case AMDGPU::SGPR_352RegClassID:
1218 case AMDGPU::TTMP_352RegClassID:
1219 case AMDGPU::SGPR_384RegClassID:
1220 case AMDGPU::TTMP_384RegClassID:
1221 case AMDGPU::SGPR_512RegClassID:
1222 case AMDGPU::TTMP_512RegClassID:
1223 shift = 2;
1224 break;
1225 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1226 // this bundle?
1227 default:
1228 llvm_unreachable("unhandled register class");
1229 }
1230
1231 if (Val % (1 << shift)) {
1232 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1233 << ": scalar reg isn't aligned " << Val;
1234 }
1235
1236 return createRegOperand(SRegClassID, Val >> shift);
1237}
1238
1240 bool IsHi) const {
1241 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1242 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1243}
1244
1245// Decode Literals for insts which always have a literal in the encoding
1248 if (HasLiteral) {
1249 assert(
1251 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1252 if (Literal != Val)
1253 return errOperand(Val, "More than one unique literal is illegal");
1254 }
1255 HasLiteral = true;
1256 Literal = Val;
1257 return MCOperand::createImm(Literal);
1258}
1259
1261 // For now all literal constants are supposed to be unsigned integer
1262 // ToDo: deal with signed/unsigned 64-bit integer constants
1263 // ToDo: deal with float/double constants
1264 if (!HasLiteral) {
1265 if (Bytes.size() < 4) {
1266 return errOperand(0, "cannot read literal, inst bytes left " +
1267 Twine(Bytes.size()));
1268 }
1269 HasLiteral = true;
1270 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1271 if (ExtendFP64)
1272 Literal64 <<= 32;
1273 }
1274 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1275}
1276
1278 using namespace AMDGPU::EncValues;
1279
1280 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1281 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1282 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1283 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1284 // Cast prevents negative overflow.
1285}
1286
1287static int64_t getInlineImmVal32(unsigned Imm) {
1288 switch (Imm) {
1289 case 240:
1290 return llvm::bit_cast<uint32_t>(0.5f);
1291 case 241:
1292 return llvm::bit_cast<uint32_t>(-0.5f);
1293 case 242:
1294 return llvm::bit_cast<uint32_t>(1.0f);
1295 case 243:
1296 return llvm::bit_cast<uint32_t>(-1.0f);
1297 case 244:
1298 return llvm::bit_cast<uint32_t>(2.0f);
1299 case 245:
1300 return llvm::bit_cast<uint32_t>(-2.0f);
1301 case 246:
1302 return llvm::bit_cast<uint32_t>(4.0f);
1303 case 247:
1304 return llvm::bit_cast<uint32_t>(-4.0f);
1305 case 248: // 1 / (2 * PI)
1306 return 0x3e22f983;
1307 default:
1308 llvm_unreachable("invalid fp inline imm");
1309 }
1310}
1311
1312static int64_t getInlineImmVal64(unsigned Imm) {
1313 switch (Imm) {
1314 case 240:
1315 return llvm::bit_cast<uint64_t>(0.5);
1316 case 241:
1317 return llvm::bit_cast<uint64_t>(-0.5);
1318 case 242:
1319 return llvm::bit_cast<uint64_t>(1.0);
1320 case 243:
1321 return llvm::bit_cast<uint64_t>(-1.0);
1322 case 244:
1323 return llvm::bit_cast<uint64_t>(2.0);
1324 case 245:
1325 return llvm::bit_cast<uint64_t>(-2.0);
1326 case 246:
1327 return llvm::bit_cast<uint64_t>(4.0);
1328 case 247:
1329 return llvm::bit_cast<uint64_t>(-4.0);
1330 case 248: // 1 / (2 * PI)
1331 return 0x3fc45f306dc9c882;
1332 default:
1333 llvm_unreachable("invalid fp inline imm");
1334 }
1335}
1336
1337static int64_t getInlineImmValF16(unsigned Imm) {
1338 switch (Imm) {
1339 case 240:
1340 return 0x3800;
1341 case 241:
1342 return 0xB800;
1343 case 242:
1344 return 0x3C00;
1345 case 243:
1346 return 0xBC00;
1347 case 244:
1348 return 0x4000;
1349 case 245:
1350 return 0xC000;
1351 case 246:
1352 return 0x4400;
1353 case 247:
1354 return 0xC400;
1355 case 248: // 1 / (2 * PI)
1356 return 0x3118;
1357 default:
1358 llvm_unreachable("invalid fp inline imm");
1359 }
1360}
1361
1362static int64_t getInlineImmValBF16(unsigned Imm) {
1363 switch (Imm) {
1364 case 240:
1365 return 0x3F00;
1366 case 241:
1367 return 0xBF00;
1368 case 242:
1369 return 0x3F80;
1370 case 243:
1371 return 0xBF80;
1372 case 244:
1373 return 0x4000;
1374 case 245:
1375 return 0xC000;
1376 case 246:
1377 return 0x4080;
1378 case 247:
1379 return 0xC080;
1380 case 248: // 1 / (2 * PI)
1381 return 0x3E22;
1382 default:
1383 llvm_unreachable("invalid fp inline imm");
1384 }
1385}
1386
1387static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1389 : getInlineImmValF16(Imm);
1390}
1391
1392MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1396
1397 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1398 // ImmWidth 0 is a default case where operand should not allow immediates.
1399 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1400 // use it to print verbose error message.
1401 switch (ImmWidth) {
1402 case 0:
1403 case 32:
1405 case 64:
1407 case 16:
1408 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1409 default:
1410 llvm_unreachable("implement me");
1411 }
1412}
1413
1415 using namespace AMDGPU;
1416
1417 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1418 switch (Width) {
1419 default: // fall
1420 case OPW32:
1421 case OPW16:
1422 case OPWV216:
1423 return VGPR_32RegClassID;
1424 case OPW64:
1425 case OPWV232: return VReg_64RegClassID;
1426 case OPW96: return VReg_96RegClassID;
1427 case OPW128: return VReg_128RegClassID;
1428 case OPW160: return VReg_160RegClassID;
1429 case OPW256: return VReg_256RegClassID;
1430 case OPW288: return VReg_288RegClassID;
1431 case OPW320: return VReg_320RegClassID;
1432 case OPW352: return VReg_352RegClassID;
1433 case OPW384: return VReg_384RegClassID;
1434 case OPW512: return VReg_512RegClassID;
1435 case OPW1024: return VReg_1024RegClassID;
1436 }
1437}
1438
1440 using namespace AMDGPU;
1441
1442 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1443 switch (Width) {
1444 default: // fall
1445 case OPW32:
1446 case OPW16:
1447 case OPWV216:
1448 return AGPR_32RegClassID;
1449 case OPW64:
1450 case OPWV232: return AReg_64RegClassID;
1451 case OPW96: return AReg_96RegClassID;
1452 case OPW128: return AReg_128RegClassID;
1453 case OPW160: return AReg_160RegClassID;
1454 case OPW256: return AReg_256RegClassID;
1455 case OPW288: return AReg_288RegClassID;
1456 case OPW320: return AReg_320RegClassID;
1457 case OPW352: return AReg_352RegClassID;
1458 case OPW384: return AReg_384RegClassID;
1459 case OPW512: return AReg_512RegClassID;
1460 case OPW1024: return AReg_1024RegClassID;
1461 }
1462}
1463
1464
1466 using namespace AMDGPU;
1467
1468 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1469 switch (Width) {
1470 default: // fall
1471 case OPW32:
1472 case OPW16:
1473 case OPWV216:
1474 return SGPR_32RegClassID;
1475 case OPW64:
1476 case OPWV232: return SGPR_64RegClassID;
1477 case OPW96: return SGPR_96RegClassID;
1478 case OPW128: return SGPR_128RegClassID;
1479 case OPW160: return SGPR_160RegClassID;
1480 case OPW256: return SGPR_256RegClassID;
1481 case OPW288: return SGPR_288RegClassID;
1482 case OPW320: return SGPR_320RegClassID;
1483 case OPW352: return SGPR_352RegClassID;
1484 case OPW384: return SGPR_384RegClassID;
1485 case OPW512: return SGPR_512RegClassID;
1486 }
1487}
1488
1490 using namespace AMDGPU;
1491
1492 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1493 switch (Width) {
1494 default: // fall
1495 case OPW32:
1496 case OPW16:
1497 case OPWV216:
1498 return TTMP_32RegClassID;
1499 case OPW64:
1500 case OPWV232: return TTMP_64RegClassID;
1501 case OPW128: return TTMP_128RegClassID;
1502 case OPW256: return TTMP_256RegClassID;
1503 case OPW288: return TTMP_288RegClassID;
1504 case OPW320: return TTMP_320RegClassID;
1505 case OPW352: return TTMP_352RegClassID;
1506 case OPW384: return TTMP_384RegClassID;
1507 case OPW512: return TTMP_512RegClassID;
1508 }
1509}
1510
1511int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1512 using namespace AMDGPU::EncValues;
1513
1514 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1515 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1516
1517 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1518}
1519
1521 bool MandatoryLiteral,
1522 unsigned ImmWidth,
1523 AMDGPU::OperandSemantics Sema) const {
1524 using namespace AMDGPU::EncValues;
1525
1526 assert(Val < 1024); // enum10
1527
1528 bool IsAGPR = Val & 512;
1529 Val &= 511;
1530
1531 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1532 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1533 : getVgprClassId(Width), Val - VGPR_MIN);
1534 }
1535 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1536 Sema);
1537}
1538
1541 bool MandatoryLiteral, unsigned ImmWidth,
1542 AMDGPU::OperandSemantics Sema) const {
1543 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1544 // decoded earlier.
1545 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1546 using namespace AMDGPU::EncValues;
1547
1548 if (Val <= SGPR_MAX) {
1549 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1550 static_assert(SGPR_MIN == 0);
1551 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1552 }
1553
1554 int TTmpIdx = getTTmpIdx(Val);
1555 if (TTmpIdx >= 0) {
1556 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1557 }
1558
1559 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1560 return decodeIntImmed(Val);
1561
1562 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1563 return decodeFPImmed(ImmWidth, Val, Sema);
1564
1565 if (Val == LITERAL_CONST) {
1566 if (MandatoryLiteral)
1567 // Keep a sentinel value for deferred setting
1568 return MCOperand::createImm(LITERAL_CONST);
1569 else
1571 }
1572
1573 switch (Width) {
1574 case OPW32:
1575 case OPW16:
1576 case OPWV216:
1577 return decodeSpecialReg32(Val);
1578 case OPW64:
1579 case OPWV232:
1580 return decodeSpecialReg64(Val);
1581 default:
1582 llvm_unreachable("unexpected immediate type");
1583 }
1584}
1585
1586// Bit 0 of DstY isn't stored in the instruction, because it's always the
1587// opposite of bit 0 of DstX.
1589 unsigned Val) const {
1590 int VDstXInd =
1591 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1592 assert(VDstXInd != -1);
1593 assert(Inst.getOperand(VDstXInd).isReg());
1594 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1595 Val |= ~XDstReg & 1;
1597 return createRegOperand(getVgprClassId(Width), Val);
1598}
1599
1601 using namespace AMDGPU;
1602
1603 switch (Val) {
1604 // clang-format off
1605 case 102: return createRegOperand(FLAT_SCR_LO);
1606 case 103: return createRegOperand(FLAT_SCR_HI);
1607 case 104: return createRegOperand(XNACK_MASK_LO);
1608 case 105: return createRegOperand(XNACK_MASK_HI);
1609 case 106: return createRegOperand(VCC_LO);
1610 case 107: return createRegOperand(VCC_HI);
1611 case 108: return createRegOperand(TBA_LO);
1612 case 109: return createRegOperand(TBA_HI);
1613 case 110: return createRegOperand(TMA_LO);
1614 case 111: return createRegOperand(TMA_HI);
1615 case 124:
1616 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1617 case 125:
1618 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1619 case 126: return createRegOperand(EXEC_LO);
1620 case 127: return createRegOperand(EXEC_HI);
1621 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1622 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1623 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1624 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1625 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1626 case 251: return createRegOperand(SRC_VCCZ);
1627 case 252: return createRegOperand(SRC_EXECZ);
1628 case 253: return createRegOperand(SRC_SCC);
1629 case 254: return createRegOperand(LDS_DIRECT);
1630 default: break;
1631 // clang-format on
1632 }
1633 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1634}
1635
1637 using namespace AMDGPU;
1638
1639 switch (Val) {
1640 case 102: return createRegOperand(FLAT_SCR);
1641 case 104: return createRegOperand(XNACK_MASK);
1642 case 106: return createRegOperand(VCC);
1643 case 108: return createRegOperand(TBA);
1644 case 110: return createRegOperand(TMA);
1645 case 124:
1646 if (isGFX11Plus())
1647 return createRegOperand(SGPR_NULL);
1648 break;
1649 case 125:
1650 if (!isGFX11Plus())
1651 return createRegOperand(SGPR_NULL);
1652 break;
1653 case 126: return createRegOperand(EXEC);
1654 case 235: return createRegOperand(SRC_SHARED_BASE);
1655 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1656 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1657 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1658 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1659 case 251: return createRegOperand(SRC_VCCZ);
1660 case 252: return createRegOperand(SRC_EXECZ);
1661 case 253: return createRegOperand(SRC_SCC);
1662 default: break;
1663 }
1664 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1665}
1666
1668AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1669 unsigned ImmWidth,
1670 AMDGPU::OperandSemantics Sema) const {
1671 using namespace AMDGPU::SDWA;
1672 using namespace AMDGPU::EncValues;
1673
1674 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1675 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1676 // XXX: cast to int is needed to avoid stupid warning:
1677 // compare with unsigned is always true
1678 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1679 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1680 return createRegOperand(getVgprClassId(Width),
1681 Val - SDWA9EncValues::SRC_VGPR_MIN);
1682 }
1683 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1684 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1685 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1686 return createSRegOperand(getSgprClassId(Width),
1687 Val - SDWA9EncValues::SRC_SGPR_MIN);
1688 }
1689 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1690 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1691 return createSRegOperand(getTtmpClassId(Width),
1692 Val - SDWA9EncValues::SRC_TTMP_MIN);
1693 }
1694
1695 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1696
1697 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1698 return decodeIntImmed(SVal);
1699
1700 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1701 return decodeFPImmed(ImmWidth, SVal, Sema);
1702
1703 return decodeSpecialReg32(SVal);
1704 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1705 return createRegOperand(getVgprClassId(Width), Val);
1706 }
1707 llvm_unreachable("unsupported target");
1708}
1709
1712}
1713
1716}
1717
1719 using namespace AMDGPU::SDWA;
1720
1721 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1722 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1723 "SDWAVopcDst should be present only on GFX9+");
1724
1725 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1726
1727 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1728 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1729
1730 int TTmpIdx = getTTmpIdx(Val);
1731 if (TTmpIdx >= 0) {
1732 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1733 return createSRegOperand(TTmpClsId, TTmpIdx);
1734 } else if (Val > SGPR_MAX) {
1735 return IsWave64 ? decodeSpecialReg64(Val)
1736 : decodeSpecialReg32(Val);
1737 } else {
1738 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1739 }
1740 } else {
1741 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1742 }
1743}
1744
1746 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1747 ? decodeSrcOp(OPW64, Val)
1748 : decodeSrcOp(OPW32, Val);
1749}
1750
1752 return decodeSrcOp(OPW32, Val);
1753}
1754
1757 return MCOperand();
1758 return MCOperand::createImm(Val);
1759}
1760
1762 using VersionField = AMDGPU::EncodingField<7, 0>;
1763 using W64Bit = AMDGPU::EncodingBit<13>;
1764 using W32Bit = AMDGPU::EncodingBit<14>;
1765 using MDPBit = AMDGPU::EncodingBit<15>;
1767
1768 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1769
1770 // Decode into a plain immediate if any unused bits are raised.
1771 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1772 return MCOperand::createImm(Imm);
1773
1774 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1775 auto I = find_if(Versions,
1777 return V.Code == Version;
1778 });
1779 MCContext &Ctx = getContext();
1780 const MCExpr *E;
1781 if (I == Versions.end())
1783 else
1784 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1785
1786 if (W64)
1787 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1788 if (W32)
1789 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1790 if (MDP)
1791 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1792
1793 return MCOperand::createExpr(E);
1794}
1795
1797 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1798}
1799
1801
1803 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1804}
1805
1807
1809
1811 return AMDGPU::isGFX10Plus(STI);
1812}
1813
1815 return STI.hasFeature(AMDGPU::FeatureGFX11);
1816}
1817
1819 return AMDGPU::isGFX11Plus(STI);
1820}
1821
1823 return STI.hasFeature(AMDGPU::FeatureGFX12);
1824}
1825
1827 return AMDGPU::isGFX12Plus(STI);
1828}
1829
1831 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1832}
1833
1836}
1837
1838//===----------------------------------------------------------------------===//
1839// AMDGPU specific symbol handling
1840//===----------------------------------------------------------------------===//
1841
1842/// Print a string describing the reserved bit range specified by Mask with
1843/// offset BaseBytes for use in error comments. Mask is a single continuous
1844/// range of 1s surrounded by zeros. The format here is meant to align with the
1845/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1846static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1847 SmallString<32> Result;
1848 raw_svector_ostream S(Result);
1849
1850 int TrailingZeros = llvm::countr_zero(Mask);
1851 int PopCount = llvm::popcount(Mask);
1852
1853 if (PopCount == 1) {
1854 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1855 } else {
1856 S << "bits in range ("
1857 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1858 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1859 }
1860
1861 return Result;
1862}
1863
1864#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1865#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1866 do { \
1867 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1868 } while (0)
1869#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1870 do { \
1871 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1872 << GET_FIELD(MASK) << '\n'; \
1873 } while (0)
1874
1875#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1876 do { \
1877 if (FourByteBuffer & (MASK)) { \
1878 return createStringError(std::errc::invalid_argument, \
1879 "kernel descriptor " DESC \
1880 " reserved %s set" MSG, \
1881 getBitRangeFromMask((MASK), 0).c_str()); \
1882 } \
1883 } while (0)
1884
1885#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1886#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1887 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1888#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1889 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1890#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1891 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1892
1893// NOLINTNEXTLINE(readability-identifier-naming)
1895 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1896 using namespace amdhsa;
1897 StringRef Indent = "\t";
1898
1899 // We cannot accurately backward compute #VGPRs used from
1900 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1901 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1902 // simply calculate the inverse of what the assembler does.
1903
1904 uint32_t GranulatedWorkitemVGPRCount =
1905 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1906
1907 uint32_t NextFreeVGPR =
1908 (GranulatedWorkitemVGPRCount + 1) *
1909 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1910
1911 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1912
1913 // We cannot backward compute values used to calculate
1914 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1915 // directives can't be computed:
1916 // .amdhsa_reserve_vcc
1917 // .amdhsa_reserve_flat_scratch
1918 // .amdhsa_reserve_xnack_mask
1919 // They take their respective default values if not specified in the assembly.
1920 //
1921 // GRANULATED_WAVEFRONT_SGPR_COUNT
1922 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1923 //
1924 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1925 // are set to 0. So while disassembling we consider that:
1926 //
1927 // GRANULATED_WAVEFRONT_SGPR_COUNT
1928 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1929 //
1930 // The disassembler cannot recover the original values of those 3 directives.
1931
1932 uint32_t GranulatedWavefrontSGPRCount =
1933 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1934
1935 if (isGFX10Plus())
1936 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1937 "must be zero on gfx10+");
1938
1939 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1941
1942 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1944 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1945 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1946 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1947
1948 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1949
1950 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1951 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1952 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1953 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1954 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1955 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1956 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1957 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1958
1959 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1960
1961 if (!isGFX12Plus())
1962 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1963 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1964
1965 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1966
1967 if (!isGFX12Plus())
1968 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1969 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1970
1971 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1972 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1973
1974 if (isGFX9Plus())
1975 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1976
1977 if (!isGFX9Plus())
1978 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1979 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1980
1981 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1982
1983 if (!isGFX10Plus())
1984 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1985 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1986
1987 if (isGFX10Plus()) {
1988 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1989 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1990 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1991 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1992 }
1993
1994 if (isGFX12Plus())
1995 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1996 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1997
1998 return true;
1999}
2000
2001// NOLINTNEXTLINE(readability-identifier-naming)
2003 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2004 using namespace amdhsa;
2005 StringRef Indent = "\t";
2007 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2008 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2009 else
2010 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2011 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2012 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2013 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2014 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2015 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2016 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2017 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2018 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2019 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2020 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2021 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2022
2023 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2024 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2025 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2026
2028 ".amdhsa_exception_fp_ieee_invalid_op",
2029 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2030 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2031 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2033 ".amdhsa_exception_fp_ieee_div_zero",
2034 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2035 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2036 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2037 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2038 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2039 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2040 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2041 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2042 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2043
2044 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2045
2046 return true;
2047}
2048
2049// NOLINTNEXTLINE(readability-identifier-naming)
2051 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2052 using namespace amdhsa;
2053 StringRef Indent = "\t";
2054 if (isGFX90A()) {
2055 KdStream << Indent << ".amdhsa_accum_offset "
2056 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2057 << '\n';
2058
2059 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2060
2061 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2062 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2063 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2064 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2065 } else if (isGFX10Plus()) {
2066 // Bits [0-3].
2067 if (!isGFX12Plus()) {
2068 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2069 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2070 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2071 } else {
2073 "SHARED_VGPR_COUNT",
2074 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2075 }
2076 } else {
2077 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2078 "COMPUTE_PGM_RSRC3",
2079 "must be zero on gfx12+");
2080 }
2081
2082 // Bits [4-11].
2083 if (isGFX11()) {
2084 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2085 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2086 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2087 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2088 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2089 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2090 } else if (isGFX12Plus()) {
2092 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2093 } else {
2094 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2095 "COMPUTE_PGM_RSRC3",
2096 "must be zero on gfx10");
2097 }
2098
2099 // Bits [12].
2100 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2101 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2102
2103 // Bits [13].
2104 if (isGFX12Plus()) {
2106 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2107 } else {
2108 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2109 "COMPUTE_PGM_RSRC3",
2110 "must be zero on gfx10 or gfx11");
2111 }
2112
2113 // Bits [14-30].
2114 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2115 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2116
2117 // Bits [31].
2118 if (isGFX11Plus()) {
2120 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2121 } else {
2122 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2123 "COMPUTE_PGM_RSRC3",
2124 "must be zero on gfx10");
2125 }
2126 } else if (FourByteBuffer) {
2127 return createStringError(
2128 std::errc::invalid_argument,
2129 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2130 }
2131 return true;
2132}
2133#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2134#undef PRINT_DIRECTIVE
2135#undef GET_FIELD
2136#undef CHECK_RESERVED_BITS_IMPL
2137#undef CHECK_RESERVED_BITS
2138#undef CHECK_RESERVED_BITS_MSG
2139#undef CHECK_RESERVED_BITS_DESC
2140#undef CHECK_RESERVED_BITS_DESC_MSG
2141
2142/// Create an error object to return from onSymbolStart for reserved kernel
2143/// descriptor bits being set.
2144static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2145 const char *Msg = "") {
2146 return createStringError(
2147 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2148 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2149}
2150
2151/// Create an error object to return from onSymbolStart for reserved kernel
2152/// descriptor bytes being set.
2153static Error createReservedKDBytesError(unsigned BaseInBytes,
2154 unsigned WidthInBytes) {
2155 // Create an error comment in the same format as the "Kernel Descriptor"
2156 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2157 return createStringError(
2158 std::errc::invalid_argument,
2159 "kernel descriptor reserved bits in range (%u:%u) set",
2160 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2161}
2162
2165 raw_string_ostream &KdStream) const {
2166#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2167 do { \
2168 KdStream << Indent << DIRECTIVE " " \
2169 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2170 } while (0)
2171
2172 uint16_t TwoByteBuffer = 0;
2173 uint32_t FourByteBuffer = 0;
2174
2175 StringRef ReservedBytes;
2176 StringRef Indent = "\t";
2177
2178 assert(Bytes.size() == 64);
2179 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2180
2181 switch (Cursor.tell()) {
2183 FourByteBuffer = DE.getU32(Cursor);
2184 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2185 << '\n';
2186 return true;
2187
2189 FourByteBuffer = DE.getU32(Cursor);
2190 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2191 << FourByteBuffer << '\n';
2192 return true;
2193
2195 FourByteBuffer = DE.getU32(Cursor);
2196 KdStream << Indent << ".amdhsa_kernarg_size "
2197 << FourByteBuffer << '\n';
2198 return true;
2199
2201 // 4 reserved bytes, must be 0.
2202 ReservedBytes = DE.getBytes(Cursor, 4);
2203 for (int I = 0; I < 4; ++I) {
2204 if (ReservedBytes[I] != 0)
2206 }
2207 return true;
2208
2210 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2211 // So far no directive controls this for Code Object V3, so simply skip for
2212 // disassembly.
2213 DE.skip(Cursor, 8);
2214 return true;
2215
2217 // 20 reserved bytes, must be 0.
2218 ReservedBytes = DE.getBytes(Cursor, 20);
2219 for (int I = 0; I < 20; ++I) {
2220 if (ReservedBytes[I] != 0)
2222 }
2223 return true;
2224
2226 FourByteBuffer = DE.getU32(Cursor);
2227 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2228
2230 FourByteBuffer = DE.getU32(Cursor);
2231 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2232
2234 FourByteBuffer = DE.getU32(Cursor);
2235 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2236
2238 using namespace amdhsa;
2239 TwoByteBuffer = DE.getU16(Cursor);
2240
2242 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2243 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2244 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2245 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2246 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2247 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2248 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2249 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2250 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2253 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2254 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2255 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2256 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2257
2258 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2259 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2261
2262 // Reserved for GFX9
2263 if (isGFX9() &&
2264 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2266 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2267 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2268 } else if (isGFX10Plus()) {
2269 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2270 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2271 }
2272
2273 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2274 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2275 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2276
2277 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2278 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2280 }
2281
2282 return true;
2283
2285 using namespace amdhsa;
2286 TwoByteBuffer = DE.getU16(Cursor);
2287 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2288 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2289 KERNARG_PRELOAD_SPEC_LENGTH);
2290 }
2291
2292 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2293 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2294 KERNARG_PRELOAD_SPEC_OFFSET);
2295 }
2296 return true;
2297
2299 // 4 bytes from here are reserved, must be 0.
2300 ReservedBytes = DE.getBytes(Cursor, 4);
2301 for (int I = 0; I < 4; ++I) {
2302 if (ReservedBytes[I] != 0)
2304 }
2305 return true;
2306
2307 default:
2308 llvm_unreachable("Unhandled index. Case statements cover everything.");
2309 return true;
2310 }
2311#undef PRINT_DIRECTIVE
2312}
2313
2315 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2316
2317 // CP microcode requires the kernel descriptor to be 64 aligned.
2318 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2319 return createStringError(std::errc::invalid_argument,
2320 "kernel descriptor must be 64-byte aligned");
2321
2322 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2323 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2324 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2325 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2326 // when required.
2327 if (isGFX10Plus()) {
2328 uint16_t KernelCodeProperties =
2331 EnableWavefrontSize32 =
2332 AMDHSA_BITS_GET(KernelCodeProperties,
2333 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2334 }
2335
2336 std::string Kd;
2337 raw_string_ostream KdStream(Kd);
2338 KdStream << ".amdhsa_kernel " << KdName << '\n';
2339
2341 while (C && C.tell() < Bytes.size()) {
2342 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2343
2344 cantFail(C.takeError());
2345
2346 if (!Res)
2347 return Res;
2348 }
2349 KdStream << ".end_amdhsa_kernel\n";
2350 outs() << KdStream.str();
2351 return true;
2352}
2353
2355 uint64_t &Size,
2356 ArrayRef<uint8_t> Bytes,
2357 uint64_t Address) const {
2358 // Right now only kernel descriptor needs to be handled.
2359 // We ignore all other symbols for target specific handling.
2360 // TODO:
2361 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2362 // Object V2 and V3 when symbols are marked protected.
2363
2364 // amd_kernel_code_t for Code Object V2.
2365 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2366 Size = 256;
2367 return createStringError(std::errc::invalid_argument,
2368 "code object v2 is not supported");
2369 }
2370
2371 // Code Object V3 kernel descriptors.
2372 StringRef Name = Symbol.Name;
2373 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2374 Size = 64; // Size = 64 regardless of success or failure.
2375 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2376 }
2377
2378 return false;
2379}
2380
2381const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2382 int64_t Val) {
2383 MCContext &Ctx = getContext();
2384 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2385 // Note: only set value to Val on a new symbol in case an dissassembler
2386 // has already been initialized in this context.
2387 if (!Sym->isVariable()) {
2388 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2389 } else {
2390 int64_t Res = ~Val;
2391 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2392 if (!Valid || Res != Val)
2393 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2394 }
2395 return MCSymbolRefExpr::create(Sym, Ctx);
2396}
2397
2398//===----------------------------------------------------------------------===//
2399// AMDGPUSymbolizer
2400//===----------------------------------------------------------------------===//
2401
2402// Try to find symbol name for specified label
2404 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2405 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2406 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2407
2408 if (!IsBranch) {
2409 return false;
2410 }
2411
2412 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2413 if (!Symbols)
2414 return false;
2415
2416 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2417 return Val.Addr == static_cast<uint64_t>(Value) &&
2418 Val.Type == ELF::STT_NOTYPE;
2419 });
2420 if (Result != Symbols->end()) {
2421 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2422 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2424 return true;
2425 }
2426 // Add to list of referenced addresses, so caller can synthesize a label.
2427 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2428 return false;
2429}
2430
2432 int64_t Value,
2433 uint64_t Address) {
2434 llvm_unreachable("unimplemented");
2435}
2436
2437//===----------------------------------------------------------------------===//
2438// Initialization
2439//===----------------------------------------------------------------------===//
2440
2442 LLVMOpInfoCallback /*GetOpInfo*/,
2443 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2444 void *DisInfo,
2445 MCContext *Ctx,
2446 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2447 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2448}
2449
2451 const MCSubtargetInfo &STI,
2452 MCContext &Ctx) {
2453 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2454}
2455
2461}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static const MCSubtargetInfo & addDefaultWaveSize(const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:597
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
MCSubtargetInfo & getSubtargetCopy(const MCSubtargetInfo &STI)
Definition: MCContext.cpp:889
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportWarning(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1076
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
Represents a location in source code.
Definition: SMLoc.h:23
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:679
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1330
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1344
@ STT_OBJECT
Definition: ELF.h:1331
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:402
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1286
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.