LLVM 19.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
46
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
52 // ToDo: AMDGPUDisassembler supports only VI ISA.
53 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
54 report_fatal_error("Disassembly not yet supported for subtarget");
55}
56
59}
60
62addOperand(MCInst &Inst, const MCOperand& Opnd) {
63 Inst.addOperand(Opnd);
64 return Opnd.isValid() ?
67}
68
70 uint16_t NameIdx) {
71 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
72 if (OpIdx != -1) {
73 auto I = MI.begin();
74 std::advance(I, OpIdx);
75 MI.insert(I, Op);
76 }
77 return OpIdx;
78}
79
80static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
82 const MCDisassembler *Decoder) {
83 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
84
85 // Our branches take a simm16, but we need two extra bits to account for the
86 // factor of 4.
87 APInt SignedOffset(18, Imm * 4, true);
88 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
89
90 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
92 return addOperand(Inst, MCOperand::createImm(Imm));
93}
94
95static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
98 int64_t Offset;
99 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
100 Offset = SignExtend64<24>(Imm);
101 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
102 Offset = Imm & 0xFFFFF;
103 } else { // GFX9+ supports 21-bit signed offsets.
104 Offset = SignExtend64<21>(Imm);
105 }
107}
108
109static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
112 return addOperand(Inst, DAsm->decodeBoolReg(Val));
113}
114
115static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
117 const MCDisassembler *Decoder) {
118 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
119 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
120}
121
122static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
123 const MCDisassembler *Decoder) {
124 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
125 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
126}
127
128#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
129 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
130 uint64_t /*Addr*/, \
131 const MCDisassembler *Decoder) { \
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
133 return addOperand(Inst, DAsm->DecoderName(Imm)); \
134 }
135
136// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
137// number of register. Used by VGPR only and AGPR only operands.
138#define DECODE_OPERAND_REG_8(RegClass) \
139 static DecodeStatus Decode##RegClass##RegisterClass( \
140 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
141 const MCDisassembler *Decoder) { \
142 assert(Imm < (1 << 8) && "8-bit encoding"); \
143 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
144 return addOperand( \
145 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
146 }
147
148#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
149 ImmWidth) \
150 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
151 const MCDisassembler *Decoder) { \
152 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
153 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
154 return addOperand(Inst, \
155 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
156 MandatoryLiteral, ImmWidth)); \
157 }
158
159static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
161 unsigned Imm, unsigned EncImm,
162 bool MandatoryLiteral, unsigned ImmWidth,
164 const MCDisassembler *Decoder) {
165 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
168 ImmWidth, Sema));
169}
170
171// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
172// get register class. Used by SGPR only operands.
173#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
174 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
175
176// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
177// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
178// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
179// Used by AV_ register classes (AGPR or VGPR only register operands).
180template <AMDGPUDisassembler::OpWidthTy OpWidth>
181static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
182 const MCDisassembler *Decoder) {
183 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
184 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
185}
186
187// Decoder for Src(9-bit encoding) registers only.
188template <AMDGPUDisassembler::OpWidthTy OpWidth>
189static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
190 uint64_t /* Addr */,
191 const MCDisassembler *Decoder) {
192 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
194}
195
196// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
197// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
198// only.
199template <AMDGPUDisassembler::OpWidthTy OpWidth>
200static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
204}
205
206// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
207// Imm{9} is acc, registers only.
208template <AMDGPUDisassembler::OpWidthTy OpWidth>
209static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
210 uint64_t /* Addr */,
211 const MCDisassembler *Decoder) {
212 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
214}
215
216// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
217// register from RegClass or immediate. Registers that don't belong to RegClass
218// will be decoded and InstPrinter will report warning. Immediate will be
219// decoded into constant of size ImmWidth, should match width of immediate used
220// by OperandType (important for floating point types).
221template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
222 unsigned OperandSemantics>
223static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
224 uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
227 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
228}
229
230// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
231// and decode using 'enum10' from decodeSrcOp.
232template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
233 unsigned OperandSemantics>
234static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
235 uint64_t /* Addr */,
236 const MCDisassembler *Decoder) {
237 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
238 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
239}
240
241template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
242 unsigned OperandSemantics>
244 uint64_t /* Addr */,
245 const MCDisassembler *Decoder) {
246 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
247 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
248}
249
250// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
251// when RegisterClass is used as an operand. Most often used for destination
252// operands.
253
255DECODE_OPERAND_REG_8(VGPR_32_Lo128)
258DECODE_OPERAND_REG_8(VReg_128)
259DECODE_OPERAND_REG_8(VReg_256)
260DECODE_OPERAND_REG_8(VReg_288)
261DECODE_OPERAND_REG_8(VReg_352)
262DECODE_OPERAND_REG_8(VReg_384)
263DECODE_OPERAND_REG_8(VReg_512)
264DECODE_OPERAND_REG_8(VReg_1024)
265
266DECODE_OPERAND_REG_7(SReg_32, OPW32)
267DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
268DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
269DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
270DECODE_OPERAND_REG_7(SReg_64, OPW64)
271DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
272DECODE_OPERAND_REG_7(SReg_96, OPW96)
273DECODE_OPERAND_REG_7(SReg_128, OPW128)
274DECODE_OPERAND_REG_7(SReg_256, OPW256)
275DECODE_OPERAND_REG_7(SReg_512, OPW512)
276
279DECODE_OPERAND_REG_8(AReg_128)
280DECODE_OPERAND_REG_8(AReg_256)
281DECODE_OPERAND_REG_8(AReg_512)
282DECODE_OPERAND_REG_8(AReg_1024)
283
285 uint64_t /*Addr*/,
286 const MCDisassembler *Decoder) {
287 assert(isUInt<10>(Imm) && "10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
289
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
292 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
293 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
294}
295
296static DecodeStatus
298 const MCDisassembler *Decoder) {
299 assert(isUInt<8>(Imm) && "8-bit encoding expected");
300
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
303 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
304 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
305}
306
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 bool IsVGPR = Imm & (1 << 8);
314 if (IsVGPR) {
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318 }
319 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
320 Imm & 0xFF, false, 16));
321}
322
323static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
324 uint64_t /*Addr*/,
325 const MCDisassembler *Decoder) {
326 assert(isUInt<10>(Imm) && "10-bit encoding expected");
327
328 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
329 bool IsVGPR = Imm & (1 << 8);
330 if (IsVGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
336 Imm & 0xFF, false, 16));
337}
338
339static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
341 const MCDisassembler *Decoder) {
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
344}
345
346static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
347 uint64_t Addr, const void *Decoder) {
348 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
349 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
350}
351
352static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
353 const MCRegisterInfo *MRI) {
354 if (OpIdx < 0)
355 return false;
356
357 const MCOperand &Op = Inst.getOperand(OpIdx);
358 if (!Op.isReg())
359 return false;
360
361 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
362 auto Reg = Sub ? Sub : Op.getReg();
363 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
364}
365
366static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
368 const MCDisassembler *Decoder) {
369 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
370 if (!DAsm->isGFX90A()) {
371 Imm &= 511;
372 } else {
373 // If atomic has both vdata and vdst their register classes are tied.
374 // The bit is decoded along with the vdst, first operand. We need to
375 // change register class to AGPR if vdst was AGPR.
376 // If a DS instruction has both data0 and data1 their register classes
377 // are also tied.
378 unsigned Opc = Inst.getOpcode();
379 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
380 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
381 : AMDGPU::OpName::vdata;
382 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
383 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
384 if ((int)Inst.getNumOperands() == DataIdx) {
385 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
386 if (IsAGPROperand(Inst, DstIdx, MRI))
387 Imm |= 512;
388 }
389
390 if (TSFlags & SIInstrFlags::DS) {
391 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
392 if ((int)Inst.getNumOperands() == Data2Idx &&
393 IsAGPROperand(Inst, DataIdx, MRI))
394 Imm |= 512;
395 }
396 }
397 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
398}
399
400template <AMDGPUDisassembler::OpWidthTy Opw>
401static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
402 uint64_t /* Addr */,
403 const MCDisassembler *Decoder) {
404 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
405}
406
407static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
409 const MCDisassembler *Decoder) {
410 assert(Imm < (1 << 9) && "9-bit encoding");
411 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
412 return addOperand(Inst,
413 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
415}
416
417#define DECODE_SDWA(DecName) \
418DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
419
420DECODE_SDWA(Src32)
421DECODE_SDWA(Src16)
422DECODE_SDWA(VopcDst)
423
424#include "AMDGPUGenDisassemblerTables.inc"
425
426//===----------------------------------------------------------------------===//
427//
428//===----------------------------------------------------------------------===//
429
430template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
431 assert(Bytes.size() >= sizeof(T));
432 const auto Res =
433 support::endian::read<T, llvm::endianness::little>(Bytes.data());
434 Bytes = Bytes.slice(sizeof(T));
435 return Res;
436}
437
439 assert(Bytes.size() >= 12);
440 uint64_t Lo =
441 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
442 Bytes = Bytes.slice(8);
443 uint64_t Hi =
444 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
445 Bytes = Bytes.slice(4);
446 return DecoderUInt128(Lo, Hi);
447}
448
450 ArrayRef<uint8_t> Bytes_,
452 raw_ostream &CS) const {
453 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
454 Bytes = Bytes_.slice(0, MaxInstBytesNum);
455
456 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
457 // there are fewer bytes left). This will be overridden on success.
458 Size = std::min((size_t)4, Bytes_.size());
459
460 do {
461 // ToDo: better to switch encoding length using some bit predicate
462 // but it is unknown yet, so try all we can
463
464 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
465 // encodings
466 if (isGFX11Plus() && Bytes.size() >= 12 ) {
467 DecoderUInt128 DecW = eat12Bytes(Bytes);
468
469 if (isGFX11() &&
470 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
471 DecW, Address, CS))
472 break;
473
474 if (isGFX12() &&
475 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
476 DecW, Address, CS))
477 break;
478
479 if (isGFX12() &&
480 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
481 break;
482 }
483
484 // Reinitialize Bytes
485 Bytes = Bytes_.slice(0, MaxInstBytesNum);
486
487 if (Bytes.size() >= 8) {
488 const uint64_t QW = eatBytes<uint64_t>(Bytes);
489
490 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
491 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
492 break;
493
494 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
495 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
496 break;
497
498 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
499 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
500 // table first so we print the correct name.
501 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
502 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
503 break;
504
505 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
506 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
507 break;
508
509 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
510 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
511 break;
512
513 if ((isVI() || isGFX9()) &&
514 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
515 break;
516
517 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
518 break;
519
520 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
521 break;
522
523 if (isGFX12() &&
524 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
525 Address, CS))
526 break;
527
528 if (isGFX11() &&
529 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
530 Address, CS))
531 break;
532
533 if (isGFX11() &&
534 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
535 break;
536
537 if (isGFX12() &&
538 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
539 break;
540 }
541
542 // Reinitialize Bytes
543 Bytes = Bytes_.slice(0, MaxInstBytesNum);
544
545 // Try decode 32-bit instruction
546 if (Bytes.size() >= 4) {
547 const uint32_t DW = eatBytes<uint32_t>(Bytes);
548
549 if ((isVI() || isGFX9()) &&
550 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
551 break;
552
553 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
554 break;
555
556 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
557 break;
558
559 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
560 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
561 break;
562
563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
564 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
565 break;
566
567 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
568 break;
569
570 if (isGFX11() &&
571 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
572 Address, CS))
573 break;
574
575 if (isGFX12() &&
576 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
577 Address, CS))
578 break;
579 }
580
582 } while (false);
583
584 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
585 if (isMacDPP(MI))
587
588 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
590 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
591 AMDGPU::isVOPC64DPP(MI.getOpcode()))
592 convertVOPCDPPInst(MI); // Special VOP3 case
593 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
594 -1)
596 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
597 convertVOP3DPPInst(MI); // Regular VOP3 case
598 }
599
600 if (AMDGPU::isMAC(MI.getOpcode())) {
601 // Insert dummy unused src2_modifiers.
603 AMDGPU::OpName::src2_modifiers);
604 }
605
606 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
607 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
608 // Insert dummy unused src2_modifiers.
610 AMDGPU::OpName::src2_modifiers);
611 }
612
613 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
615 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
616 }
617
618 if (MCII->get(MI.getOpcode()).TSFlags &
620 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
621 AMDGPU::OpName::cpol);
622 if (CPolPos != -1) {
623 unsigned CPol =
624 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
626 if (MI.getNumOperands() <= (unsigned)CPolPos) {
628 AMDGPU::OpName::cpol);
629 } else if (CPol) {
630 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
631 }
632 }
633 }
634
635 if ((MCII->get(MI.getOpcode()).TSFlags &
637 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
638 // GFX90A lost TFE, its place is occupied by ACC.
639 int TFEOpIdx =
640 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
641 if (TFEOpIdx != -1) {
642 auto TFEIter = MI.begin();
643 std::advance(TFEIter, TFEOpIdx);
644 MI.insert(TFEIter, MCOperand::createImm(0));
645 }
646 }
647
648 if (MCII->get(MI.getOpcode()).TSFlags &
650 int SWZOpIdx =
651 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
652 if (SWZOpIdx != -1) {
653 auto SWZIter = MI.begin();
654 std::advance(SWZIter, SWZOpIdx);
655 MI.insert(SWZIter, MCOperand::createImm(0));
656 }
657 }
658
659 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
660 int VAddr0Idx =
661 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
662 int RsrcIdx =
663 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
664 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
665 if (VAddr0Idx >= 0 && NSAArgs > 0) {
666 unsigned NSAWords = (NSAArgs + 3) / 4;
667 if (Bytes.size() < 4 * NSAWords)
669 for (unsigned i = 0; i < NSAArgs; ++i) {
670 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
671 auto VAddrRCID =
672 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
673 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
674 }
675 Bytes = Bytes.slice(4 * NSAWords);
676 }
677
679 }
680
681 if (MCII->get(MI.getOpcode()).TSFlags &
684
685 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
687
688 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
690
691 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
693
694 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
695 AMDGPU::OpName::vdst_in);
696 if (VDstIn_Idx != -1) {
697 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
699 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
700 !MI.getOperand(VDstIn_Idx).isReg() ||
701 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
702 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
703 MI.erase(&MI.getOperand(VDstIn_Idx));
705 MCOperand::createReg(MI.getOperand(Tied).getReg()),
706 AMDGPU::OpName::vdst_in);
707 }
708 }
709
710 int ImmLitIdx =
711 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
712 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
713 if (ImmLitIdx != -1 && !IsSOPK)
714 convertFMAanyK(MI, ImmLitIdx);
715
716 Size = MaxInstBytesNum - Bytes.size();
718}
719
721 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
722 // The MCInst still has these fields even though they are no longer encoded
723 // in the GFX11 instruction.
724 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
725 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
726 }
727}
728
730 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
731 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
732 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
733 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
734 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
735 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
736 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
737 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
738 // The MCInst has this field that is not directly encoded in the
739 // instruction.
740 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
741 }
742}
743
745 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
746 STI.hasFeature(AMDGPU::FeatureGFX10)) {
747 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
748 // VOPC - insert clamp
749 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
750 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
751 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
752 if (SDst != -1) {
753 // VOPC - insert VCC register as sdst
755 AMDGPU::OpName::sdst);
756 } else {
757 // VOP1/2 - insert omod if present in instruction
758 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
759 }
760 }
761}
762
764 unsigned OpSel = 0;
765 unsigned OpSelHi = 0;
766 unsigned NegLo = 0;
767 unsigned NegHi = 0;
768};
769
770// Reconstruct values of VOP3/VOP3P operands such as op_sel.
771// Note that these values do not affect disassembler output,
772// so this is only necessary for consistency with src_modifiers.
774 bool IsVOP3P = false) {
775 VOPModifiers Modifiers;
776 unsigned Opc = MI.getOpcode();
777 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
778 AMDGPU::OpName::src1_modifiers,
779 AMDGPU::OpName::src2_modifiers};
780 for (int J = 0; J < 3; ++J) {
781 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
782 if (OpIdx == -1)
783 continue;
784
785 unsigned Val = MI.getOperand(OpIdx).getImm();
786
787 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
788 if (IsVOP3P) {
789 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
790 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
791 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
792 } else if (J == 0) {
793 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
794 }
795 }
796
797 return Modifiers;
798}
799
800// Instructions decode the op_sel/suffix bits into the src_modifier
801// operands. Copy those bits into the src operands for true16 VGPRs.
803 const unsigned Opc = MI.getOpcode();
804 const MCRegisterClass &ConversionRC =
805 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
806 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
807 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
809 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
811 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
813 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
815 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
816 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
817 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
818 if (OpIdx == -1 || OpModsIdx == -1)
819 continue;
820 MCOperand &Op = MI.getOperand(OpIdx);
821 if (!Op.isReg())
822 continue;
823 if (!ConversionRC.contains(Op.getReg()))
824 continue;
825 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
826 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
827 unsigned ModVal = OpMods.getImm();
828 if (ModVal & OpSelMask) { // isHi
829 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
830 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
831 }
832 }
833}
834
835// MAC opcodes have special old and src2 operands.
836// src2 is tied to dst, while old is not tied (but assumed to be).
838 constexpr int DST_IDX = 0;
839 auto Opcode = MI.getOpcode();
840 const auto &Desc = MCII->get(Opcode);
841 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
842
843 if (OldIdx != -1 && Desc.getOperandConstraint(
844 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
845 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
846 assert(Desc.getOperandConstraint(
847 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
849 (void)DST_IDX;
850 return true;
851 }
852
853 return false;
854}
855
856// Create dummy old operand and insert dummy unused src2_modifiers
858 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
859 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
861 AMDGPU::OpName::src2_modifiers);
862}
863
865 unsigned Opc = MI.getOpcode();
866
867 int VDstInIdx =
868 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
869 if (VDstInIdx != -1)
870 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
871
872 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
873 if (MI.getNumOperands() < DescNumOps &&
874 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
876 auto Mods = collectVOPModifiers(MI);
878 AMDGPU::OpName::op_sel);
879 } else {
880 // Insert dummy unused src modifiers.
881 if (MI.getNumOperands() < DescNumOps &&
882 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
884 AMDGPU::OpName::src0_modifiers);
885
886 if (MI.getNumOperands() < DescNumOps &&
887 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
889 AMDGPU::OpName::src1_modifiers);
890 }
891}
892
895
896 int VDstInIdx =
897 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
898 if (VDstInIdx != -1)
899 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
900
901 unsigned Opc = MI.getOpcode();
902 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
903 if (MI.getNumOperands() < DescNumOps &&
904 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
905 auto Mods = collectVOPModifiers(MI);
907 AMDGPU::OpName::op_sel);
908 }
909}
910
911// Note that before gfx10, the MIMG encoding provided no information about
912// VADDR size. Consequently, decoded instructions always show address as if it
913// has 1 dword, which could be not really so.
915 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
916
917 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
918 AMDGPU::OpName::vdst);
919
920 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
921 AMDGPU::OpName::vdata);
922 int VAddr0Idx =
923 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
924 int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
925 : AMDGPU::OpName::rsrc;
926 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
927 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
928 AMDGPU::OpName::dmask);
929
930 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
931 AMDGPU::OpName::tfe);
932 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
933 AMDGPU::OpName::d16);
934
935 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
936 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
938
939 assert(VDataIdx != -1);
940 if (BaseOpcode->BVH) {
941 // Add A16 operand for intersect_ray instructions
942 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
943 return;
944 }
945
946 bool IsAtomic = (VDstIdx != -1);
947 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
948 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
949 bool IsNSA = false;
950 bool IsPartialNSA = false;
951 unsigned AddrSize = Info->VAddrDwords;
952
953 if (isGFX10Plus()) {
954 unsigned DimIdx =
955 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
956 int A16Idx =
957 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
958 const AMDGPU::MIMGDimInfo *Dim =
959 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
960 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
961
962 AddrSize =
963 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
964
965 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
966 // VIMAGE insts other than BVH never use vaddr4.
967 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
968 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
969 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
970 if (!IsNSA) {
971 if (!IsVSample && AddrSize > 12)
972 AddrSize = 16;
973 } else {
974 if (AddrSize > Info->VAddrDwords) {
975 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
976 // The NSA encoding does not contain enough operands for the
977 // combination of base opcode / dimension. Should this be an error?
978 return;
979 }
980 IsPartialNSA = true;
981 }
982 }
983 }
984
985 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
986 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
987
988 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
989 if (D16 && AMDGPU::hasPackedD16(STI)) {
990 DstSize = (DstSize + 1) / 2;
991 }
992
993 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
994 DstSize += 1;
995
996 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
997 return;
998
999 int NewOpcode =
1000 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1001 if (NewOpcode == -1)
1002 return;
1003
1004 // Widen the register to the correct number of enabled channels.
1005 unsigned NewVdata = AMDGPU::NoRegister;
1006 if (DstSize != Info->VDataDwords) {
1007 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1008
1009 // Get first subregister of VData
1010 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1011 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1012 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1013
1014 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1015 &MRI.getRegClass(DataRCID));
1016 if (NewVdata == AMDGPU::NoRegister) {
1017 // It's possible to encode this such that the low register + enabled
1018 // components exceeds the register count.
1019 return;
1020 }
1021 }
1022
1023 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1024 // If using partial NSA on GFX11+ widen last address register.
1025 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1026 unsigned NewVAddrSA = AMDGPU::NoRegister;
1027 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1028 AddrSize != Info->VAddrDwords) {
1029 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1030 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1031 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1032
1033 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1034 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1035 &MRI.getRegClass(AddrRCID));
1036 if (!NewVAddrSA)
1037 return;
1038 }
1039
1040 MI.setOpcode(NewOpcode);
1041
1042 if (NewVdata != AMDGPU::NoRegister) {
1043 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1044
1045 if (IsAtomic) {
1046 // Atomic operations have an additional operand (a copy of data)
1047 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1048 }
1049 }
1050
1051 if (NewVAddrSA) {
1052 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1053 } else if (IsNSA) {
1054 assert(AddrSize <= Info->VAddrDwords);
1055 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1056 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1057 }
1058}
1059
1060// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1061// decoder only adds to src_modifiers, so manually add the bits to the other
1062// operands.
1064 unsigned Opc = MI.getOpcode();
1065 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1066 auto Mods = collectVOPModifiers(MI, true);
1067
1068 if (MI.getNumOperands() < DescNumOps &&
1069 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1070 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1071
1072 if (MI.getNumOperands() < DescNumOps &&
1073 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1075 AMDGPU::OpName::op_sel);
1076 if (MI.getNumOperands() < DescNumOps &&
1077 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1079 AMDGPU::OpName::op_sel_hi);
1080 if (MI.getNumOperands() < DescNumOps &&
1081 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1083 AMDGPU::OpName::neg_lo);
1084 if (MI.getNumOperands() < DescNumOps &&
1085 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1087 AMDGPU::OpName::neg_hi);
1088}
1089
1090// Create dummy old operand and insert optional operands
1092 unsigned Opc = MI.getOpcode();
1093 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1094
1095 if (MI.getNumOperands() < DescNumOps &&
1096 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1097 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1098
1099 if (MI.getNumOperands() < DescNumOps &&
1100 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1102 AMDGPU::OpName::src0_modifiers);
1103
1104 if (MI.getNumOperands() < DescNumOps &&
1105 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1107 AMDGPU::OpName::src1_modifiers);
1108}
1109
1110void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1111 assert(HasLiteral && "Should have decoded a literal");
1112 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1113 unsigned DescNumOps = Desc.getNumOperands();
1115 AMDGPU::OpName::immDeferred);
1116 assert(DescNumOps == MI.getNumOperands());
1117 for (unsigned I = 0; I < DescNumOps; ++I) {
1118 auto &Op = MI.getOperand(I);
1119 auto OpType = Desc.operands()[I].OperandType;
1120 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1122 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1123 IsDeferredOp)
1124 Op.setImm(Literal);
1125 }
1126}
1127
1128const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1129 return getContext().getRegisterInfo()->
1130 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1131}
1132
1133inline
1135 const Twine& ErrMsg) const {
1136 *CommentStream << "Error: " + ErrMsg;
1137
1138 // ToDo: add support for error operands to MCInst.h
1139 // return MCOperand::createError(V);
1140 return MCOperand();
1141}
1142
1143inline
1146}
1147
1148inline
1150 unsigned Val) const {
1151 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1152 if (Val >= RegCl.getNumRegs())
1153 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1154 ": unknown register " + Twine(Val));
1155 return createRegOperand(RegCl.getRegister(Val));
1156}
1157
1158inline
1160 unsigned Val) const {
1161 // ToDo: SI/CI have 104 SGPRs, VI - 102
1162 // Valery: here we accepting as much as we can, let assembler sort it out
1163 int shift = 0;
1164 switch (SRegClassID) {
1165 case AMDGPU::SGPR_32RegClassID:
1166 case AMDGPU::TTMP_32RegClassID:
1167 break;
1168 case AMDGPU::SGPR_64RegClassID:
1169 case AMDGPU::TTMP_64RegClassID:
1170 shift = 1;
1171 break;
1172 case AMDGPU::SGPR_96RegClassID:
1173 case AMDGPU::TTMP_96RegClassID:
1174 case AMDGPU::SGPR_128RegClassID:
1175 case AMDGPU::TTMP_128RegClassID:
1176 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1177 // this bundle?
1178 case AMDGPU::SGPR_256RegClassID:
1179 case AMDGPU::TTMP_256RegClassID:
1180 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1181 // this bundle?
1182 case AMDGPU::SGPR_288RegClassID:
1183 case AMDGPU::TTMP_288RegClassID:
1184 case AMDGPU::SGPR_320RegClassID:
1185 case AMDGPU::TTMP_320RegClassID:
1186 case AMDGPU::SGPR_352RegClassID:
1187 case AMDGPU::TTMP_352RegClassID:
1188 case AMDGPU::SGPR_384RegClassID:
1189 case AMDGPU::TTMP_384RegClassID:
1190 case AMDGPU::SGPR_512RegClassID:
1191 case AMDGPU::TTMP_512RegClassID:
1192 shift = 2;
1193 break;
1194 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1195 // this bundle?
1196 default:
1197 llvm_unreachable("unhandled register class");
1198 }
1199
1200 if (Val % (1 << shift)) {
1201 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1202 << ": scalar reg isn't aligned " << Val;
1203 }
1204
1205 return createRegOperand(SRegClassID, Val >> shift);
1206}
1207
1209 bool IsHi) const {
1210 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1211 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1212}
1213
1214// Decode Literals for insts which always have a literal in the encoding
1217 if (HasLiteral) {
1218 assert(
1220 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1221 if (Literal != Val)
1222 return errOperand(Val, "More than one unique literal is illegal");
1223 }
1224 HasLiteral = true;
1225 Literal = Val;
1226 return MCOperand::createImm(Literal);
1227}
1228
1230 // For now all literal constants are supposed to be unsigned integer
1231 // ToDo: deal with signed/unsigned 64-bit integer constants
1232 // ToDo: deal with float/double constants
1233 if (!HasLiteral) {
1234 if (Bytes.size() < 4) {
1235 return errOperand(0, "cannot read literal, inst bytes left " +
1236 Twine(Bytes.size()));
1237 }
1238 HasLiteral = true;
1239 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1240 if (ExtendFP64)
1241 Literal64 <<= 32;
1242 }
1243 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1244}
1245
1247 using namespace AMDGPU::EncValues;
1248
1249 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1250 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1251 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1252 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1253 // Cast prevents negative overflow.
1254}
1255
1256static int64_t getInlineImmVal32(unsigned Imm) {
1257 switch (Imm) {
1258 case 240:
1259 return llvm::bit_cast<uint32_t>(0.5f);
1260 case 241:
1261 return llvm::bit_cast<uint32_t>(-0.5f);
1262 case 242:
1263 return llvm::bit_cast<uint32_t>(1.0f);
1264 case 243:
1265 return llvm::bit_cast<uint32_t>(-1.0f);
1266 case 244:
1267 return llvm::bit_cast<uint32_t>(2.0f);
1268 case 245:
1269 return llvm::bit_cast<uint32_t>(-2.0f);
1270 case 246:
1271 return llvm::bit_cast<uint32_t>(4.0f);
1272 case 247:
1273 return llvm::bit_cast<uint32_t>(-4.0f);
1274 case 248: // 1 / (2 * PI)
1275 return 0x3e22f983;
1276 default:
1277 llvm_unreachable("invalid fp inline imm");
1278 }
1279}
1280
1281static int64_t getInlineImmVal64(unsigned Imm) {
1282 switch (Imm) {
1283 case 240:
1284 return llvm::bit_cast<uint64_t>(0.5);
1285 case 241:
1286 return llvm::bit_cast<uint64_t>(-0.5);
1287 case 242:
1288 return llvm::bit_cast<uint64_t>(1.0);
1289 case 243:
1290 return llvm::bit_cast<uint64_t>(-1.0);
1291 case 244:
1292 return llvm::bit_cast<uint64_t>(2.0);
1293 case 245:
1294 return llvm::bit_cast<uint64_t>(-2.0);
1295 case 246:
1296 return llvm::bit_cast<uint64_t>(4.0);
1297 case 247:
1298 return llvm::bit_cast<uint64_t>(-4.0);
1299 case 248: // 1 / (2 * PI)
1300 return 0x3fc45f306dc9c882;
1301 default:
1302 llvm_unreachable("invalid fp inline imm");
1303 }
1304}
1305
1306static int64_t getInlineImmValF16(unsigned Imm) {
1307 switch (Imm) {
1308 case 240:
1309 return 0x3800;
1310 case 241:
1311 return 0xB800;
1312 case 242:
1313 return 0x3C00;
1314 case 243:
1315 return 0xBC00;
1316 case 244:
1317 return 0x4000;
1318 case 245:
1319 return 0xC000;
1320 case 246:
1321 return 0x4400;
1322 case 247:
1323 return 0xC400;
1324 case 248: // 1 / (2 * PI)
1325 return 0x3118;
1326 default:
1327 llvm_unreachable("invalid fp inline imm");
1328 }
1329}
1330
1331static int64_t getInlineImmValBF16(unsigned Imm) {
1332 switch (Imm) {
1333 case 240:
1334 return 0x3F00;
1335 case 241:
1336 return 0xBF00;
1337 case 242:
1338 return 0x3F80;
1339 case 243:
1340 return 0xBF80;
1341 case 244:
1342 return 0x4000;
1343 case 245:
1344 return 0xC000;
1345 case 246:
1346 return 0x4080;
1347 case 247:
1348 return 0xC080;
1349 case 248: // 1 / (2 * PI)
1350 return 0x3E22;
1351 default:
1352 llvm_unreachable("invalid fp inline imm");
1353 }
1354}
1355
1356static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1358 : getInlineImmValF16(Imm);
1359}
1360
1361MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1365
1366 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1367 // ImmWidth 0 is a default case where operand should not allow immediates.
1368 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1369 // use it to print verbose error message.
1370 switch (ImmWidth) {
1371 case 0:
1372 case 32:
1374 case 64:
1376 case 16:
1377 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1378 default:
1379 llvm_unreachable("implement me");
1380 }
1381}
1382
1384 using namespace AMDGPU;
1385
1386 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1387 switch (Width) {
1388 default: // fall
1389 case OPW32:
1390 case OPW16:
1391 case OPWV216:
1392 return VGPR_32RegClassID;
1393 case OPW64:
1394 case OPWV232: return VReg_64RegClassID;
1395 case OPW96: return VReg_96RegClassID;
1396 case OPW128: return VReg_128RegClassID;
1397 case OPW160: return VReg_160RegClassID;
1398 case OPW256: return VReg_256RegClassID;
1399 case OPW288: return VReg_288RegClassID;
1400 case OPW320: return VReg_320RegClassID;
1401 case OPW352: return VReg_352RegClassID;
1402 case OPW384: return VReg_384RegClassID;
1403 case OPW512: return VReg_512RegClassID;
1404 case OPW1024: return VReg_1024RegClassID;
1405 }
1406}
1407
1409 using namespace AMDGPU;
1410
1411 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1412 switch (Width) {
1413 default: // fall
1414 case OPW32:
1415 case OPW16:
1416 case OPWV216:
1417 return AGPR_32RegClassID;
1418 case OPW64:
1419 case OPWV232: return AReg_64RegClassID;
1420 case OPW96: return AReg_96RegClassID;
1421 case OPW128: return AReg_128RegClassID;
1422 case OPW160: return AReg_160RegClassID;
1423 case OPW256: return AReg_256RegClassID;
1424 case OPW288: return AReg_288RegClassID;
1425 case OPW320: return AReg_320RegClassID;
1426 case OPW352: return AReg_352RegClassID;
1427 case OPW384: return AReg_384RegClassID;
1428 case OPW512: return AReg_512RegClassID;
1429 case OPW1024: return AReg_1024RegClassID;
1430 }
1431}
1432
1433
1435 using namespace AMDGPU;
1436
1437 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1438 switch (Width) {
1439 default: // fall
1440 case OPW32:
1441 case OPW16:
1442 case OPWV216:
1443 return SGPR_32RegClassID;
1444 case OPW64:
1445 case OPWV232: return SGPR_64RegClassID;
1446 case OPW96: return SGPR_96RegClassID;
1447 case OPW128: return SGPR_128RegClassID;
1448 case OPW160: return SGPR_160RegClassID;
1449 case OPW256: return SGPR_256RegClassID;
1450 case OPW288: return SGPR_288RegClassID;
1451 case OPW320: return SGPR_320RegClassID;
1452 case OPW352: return SGPR_352RegClassID;
1453 case OPW384: return SGPR_384RegClassID;
1454 case OPW512: return SGPR_512RegClassID;
1455 }
1456}
1457
1459 using namespace AMDGPU;
1460
1461 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1462 switch (Width) {
1463 default: // fall
1464 case OPW32:
1465 case OPW16:
1466 case OPWV216:
1467 return TTMP_32RegClassID;
1468 case OPW64:
1469 case OPWV232: return TTMP_64RegClassID;
1470 case OPW128: return TTMP_128RegClassID;
1471 case OPW256: return TTMP_256RegClassID;
1472 case OPW288: return TTMP_288RegClassID;
1473 case OPW320: return TTMP_320RegClassID;
1474 case OPW352: return TTMP_352RegClassID;
1475 case OPW384: return TTMP_384RegClassID;
1476 case OPW512: return TTMP_512RegClassID;
1477 }
1478}
1479
1480int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1481 using namespace AMDGPU::EncValues;
1482
1483 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1484 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1485
1486 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1487}
1488
1490 bool MandatoryLiteral,
1491 unsigned ImmWidth,
1492 AMDGPU::OperandSemantics Sema) const {
1493 using namespace AMDGPU::EncValues;
1494
1495 assert(Val < 1024); // enum10
1496
1497 bool IsAGPR = Val & 512;
1498 Val &= 511;
1499
1500 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1501 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1502 : getVgprClassId(Width), Val - VGPR_MIN);
1503 }
1504 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1505 Sema);
1506}
1507
1510 bool MandatoryLiteral, unsigned ImmWidth,
1511 AMDGPU::OperandSemantics Sema) const {
1512 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1513 // decoded earlier.
1514 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1515 using namespace AMDGPU::EncValues;
1516
1517 if (Val <= SGPR_MAX) {
1518 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1519 static_assert(SGPR_MIN == 0);
1520 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1521 }
1522
1523 int TTmpIdx = getTTmpIdx(Val);
1524 if (TTmpIdx >= 0) {
1525 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1526 }
1527
1528 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1529 return decodeIntImmed(Val);
1530
1531 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1532 return decodeFPImmed(ImmWidth, Val, Sema);
1533
1534 if (Val == LITERAL_CONST) {
1535 if (MandatoryLiteral)
1536 // Keep a sentinel value for deferred setting
1537 return MCOperand::createImm(LITERAL_CONST);
1538 else
1540 }
1541
1542 switch (Width) {
1543 case OPW32:
1544 case OPW16:
1545 case OPWV216:
1546 return decodeSpecialReg32(Val);
1547 case OPW64:
1548 case OPWV232:
1549 return decodeSpecialReg64(Val);
1550 default:
1551 llvm_unreachable("unexpected immediate type");
1552 }
1553}
1554
1555// Bit 0 of DstY isn't stored in the instruction, because it's always the
1556// opposite of bit 0 of DstX.
1558 unsigned Val) const {
1559 int VDstXInd =
1560 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1561 assert(VDstXInd != -1);
1562 assert(Inst.getOperand(VDstXInd).isReg());
1563 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1564 Val |= ~XDstReg & 1;
1566 return createRegOperand(getVgprClassId(Width), Val);
1567}
1568
1570 using namespace AMDGPU;
1571
1572 switch (Val) {
1573 // clang-format off
1574 case 102: return createRegOperand(FLAT_SCR_LO);
1575 case 103: return createRegOperand(FLAT_SCR_HI);
1576 case 104: return createRegOperand(XNACK_MASK_LO);
1577 case 105: return createRegOperand(XNACK_MASK_HI);
1578 case 106: return createRegOperand(VCC_LO);
1579 case 107: return createRegOperand(VCC_HI);
1580 case 108: return createRegOperand(TBA_LO);
1581 case 109: return createRegOperand(TBA_HI);
1582 case 110: return createRegOperand(TMA_LO);
1583 case 111: return createRegOperand(TMA_HI);
1584 case 124:
1585 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1586 case 125:
1587 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1588 case 126: return createRegOperand(EXEC_LO);
1589 case 127: return createRegOperand(EXEC_HI);
1590 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1591 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1592 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1593 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1594 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1595 case 251: return createRegOperand(SRC_VCCZ);
1596 case 252: return createRegOperand(SRC_EXECZ);
1597 case 253: return createRegOperand(SRC_SCC);
1598 case 254: return createRegOperand(LDS_DIRECT);
1599 default: break;
1600 // clang-format on
1601 }
1602 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1603}
1604
1606 using namespace AMDGPU;
1607
1608 switch (Val) {
1609 case 102: return createRegOperand(FLAT_SCR);
1610 case 104: return createRegOperand(XNACK_MASK);
1611 case 106: return createRegOperand(VCC);
1612 case 108: return createRegOperand(TBA);
1613 case 110: return createRegOperand(TMA);
1614 case 124:
1615 if (isGFX11Plus())
1616 return createRegOperand(SGPR_NULL);
1617 break;
1618 case 125:
1619 if (!isGFX11Plus())
1620 return createRegOperand(SGPR_NULL);
1621 break;
1622 case 126: return createRegOperand(EXEC);
1623 case 235: return createRegOperand(SRC_SHARED_BASE);
1624 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1625 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1626 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1627 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1628 case 251: return createRegOperand(SRC_VCCZ);
1629 case 252: return createRegOperand(SRC_EXECZ);
1630 case 253: return createRegOperand(SRC_SCC);
1631 default: break;
1632 }
1633 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1634}
1635
1637AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1638 unsigned ImmWidth,
1639 AMDGPU::OperandSemantics Sema) const {
1640 using namespace AMDGPU::SDWA;
1641 using namespace AMDGPU::EncValues;
1642
1643 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1644 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1645 // XXX: cast to int is needed to avoid stupid warning:
1646 // compare with unsigned is always true
1647 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1648 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1649 return createRegOperand(getVgprClassId(Width),
1650 Val - SDWA9EncValues::SRC_VGPR_MIN);
1651 }
1652 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1653 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1654 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1655 return createSRegOperand(getSgprClassId(Width),
1656 Val - SDWA9EncValues::SRC_SGPR_MIN);
1657 }
1658 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1659 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1660 return createSRegOperand(getTtmpClassId(Width),
1661 Val - SDWA9EncValues::SRC_TTMP_MIN);
1662 }
1663
1664 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1665
1666 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1667 return decodeIntImmed(SVal);
1668
1669 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1670 return decodeFPImmed(ImmWidth, SVal, Sema);
1671
1672 return decodeSpecialReg32(SVal);
1673 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1674 return createRegOperand(getVgprClassId(Width), Val);
1675 }
1676 llvm_unreachable("unsupported target");
1677}
1678
1681}
1682
1685}
1686
1688 using namespace AMDGPU::SDWA;
1689
1690 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1691 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1692 "SDWAVopcDst should be present only on GFX9+");
1693
1694 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1695
1696 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1697 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1698
1699 int TTmpIdx = getTTmpIdx(Val);
1700 if (TTmpIdx >= 0) {
1701 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1702 return createSRegOperand(TTmpClsId, TTmpIdx);
1703 } else if (Val > SGPR_MAX) {
1704 return IsWave64 ? decodeSpecialReg64(Val)
1705 : decodeSpecialReg32(Val);
1706 } else {
1707 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1708 }
1709 } else {
1710 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1711 }
1712}
1713
1715 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1716 ? decodeSrcOp(OPW64, Val)
1717 : decodeSrcOp(OPW32, Val);
1718}
1719
1721 return decodeSrcOp(OPW32, Val);
1722}
1723
1726 return MCOperand();
1727 return MCOperand::createImm(Val);
1728}
1729
1731 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1732}
1733
1735
1737 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1738}
1739
1741
1743
1745 return AMDGPU::isGFX10Plus(STI);
1746}
1747
1749 return STI.hasFeature(AMDGPU::FeatureGFX11);
1750}
1751
1753 return AMDGPU::isGFX11Plus(STI);
1754}
1755
1757 return STI.hasFeature(AMDGPU::FeatureGFX12);
1758}
1759
1761 return AMDGPU::isGFX12Plus(STI);
1762}
1763
1765 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1766}
1767
1770}
1771
1772//===----------------------------------------------------------------------===//
1773// AMDGPU specific symbol handling
1774//===----------------------------------------------------------------------===//
1775
1776/// Print a string describing the reserved bit range specified by Mask with
1777/// offset BaseBytes for use in error comments. Mask is a single continuous
1778/// range of 1s surrounded by zeros. The format here is meant to align with the
1779/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1780static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1781 SmallString<32> Result;
1782 raw_svector_ostream S(Result);
1783
1784 int TrailingZeros = llvm::countr_zero(Mask);
1785 int PopCount = llvm::popcount(Mask);
1786
1787 if (PopCount == 1) {
1788 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1789 } else {
1790 S << "bits in range ("
1791 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1792 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1793 }
1794
1795 return Result;
1796}
1797
1798#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1799#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1800 do { \
1801 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1802 } while (0)
1803#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1804 do { \
1805 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1806 << GET_FIELD(MASK) << '\n'; \
1807 } while (0)
1808
1809#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1810 do { \
1811 if (FourByteBuffer & (MASK)) { \
1812 return createStringError(std::errc::invalid_argument, \
1813 "kernel descriptor " DESC \
1814 " reserved %s set" MSG, \
1815 getBitRangeFromMask((MASK), 0).c_str()); \
1816 } \
1817 } while (0)
1818
1819#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1820#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1821 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1822#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1823 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1824#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1825 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1826
1827// NOLINTNEXTLINE(readability-identifier-naming)
1829 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1830 using namespace amdhsa;
1831 StringRef Indent = "\t";
1832
1833 // We cannot accurately backward compute #VGPRs used from
1834 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1835 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1836 // simply calculate the inverse of what the assembler does.
1837
1838 uint32_t GranulatedWorkitemVGPRCount =
1839 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1840
1841 uint32_t NextFreeVGPR =
1842 (GranulatedWorkitemVGPRCount + 1) *
1843 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1844
1845 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1846
1847 // We cannot backward compute values used to calculate
1848 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1849 // directives can't be computed:
1850 // .amdhsa_reserve_vcc
1851 // .amdhsa_reserve_flat_scratch
1852 // .amdhsa_reserve_xnack_mask
1853 // They take their respective default values if not specified in the assembly.
1854 //
1855 // GRANULATED_WAVEFRONT_SGPR_COUNT
1856 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1857 //
1858 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1859 // are set to 0. So while disassembling we consider that:
1860 //
1861 // GRANULATED_WAVEFRONT_SGPR_COUNT
1862 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1863 //
1864 // The disassembler cannot recover the original values of those 3 directives.
1865
1866 uint32_t GranulatedWavefrontSGPRCount =
1867 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1868
1869 if (isGFX10Plus())
1870 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1871 "must be zero on gfx10+");
1872
1873 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1875
1876 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1878 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1879 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1880 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1881
1882 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1883
1884 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1885 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1886 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1887 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1888 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1889 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1890 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1891 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1892
1893 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1894
1895 if (!isGFX12Plus())
1896 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1897 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1898
1899 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1900
1901 if (!isGFX12Plus())
1902 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1903 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1904
1905 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1906 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1907
1908 if (isGFX9Plus())
1909 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1910
1911 if (!isGFX9Plus())
1912 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1913 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1914
1915 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1916
1917 if (!isGFX10Plus())
1918 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1919 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1920
1921 if (isGFX10Plus()) {
1922 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1923 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1924 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1925 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1926 }
1927
1928 if (isGFX12Plus())
1929 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1930 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1931
1932 return true;
1933}
1934
1935// NOLINTNEXTLINE(readability-identifier-naming)
1937 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1938 using namespace amdhsa;
1939 StringRef Indent = "\t";
1941 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1942 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1943 else
1944 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1945 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1946 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1947 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1948 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1949 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1950 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1951 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1952 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1953 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1954 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1955 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1956
1957 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
1958 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
1959 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
1960
1962 ".amdhsa_exception_fp_ieee_invalid_op",
1963 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1964 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1965 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1967 ".amdhsa_exception_fp_ieee_div_zero",
1968 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1969 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1970 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1971 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1972 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1973 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1974 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1975 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1976 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1977
1978 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
1979
1980 return true;
1981}
1982
1983// NOLINTNEXTLINE(readability-identifier-naming)
1985 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1986 using namespace amdhsa;
1987 StringRef Indent = "\t";
1988 if (isGFX90A()) {
1989 KdStream << Indent << ".amdhsa_accum_offset "
1990 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1991 << '\n';
1992
1993 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1994
1995 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
1996 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
1997 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
1998 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
1999 } else if (isGFX10Plus()) {
2000 // Bits [0-3].
2001 if (!isGFX12Plus()) {
2002 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2003 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2004 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2005 } else {
2007 "SHARED_VGPR_COUNT",
2008 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2009 }
2010 } else {
2011 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2012 "COMPUTE_PGM_RSRC3",
2013 "must be zero on gfx12+");
2014 }
2015
2016 // Bits [4-11].
2017 if (isGFX11()) {
2018 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2019 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2020 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2021 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2022 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2023 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2024 } else if (isGFX12Plus()) {
2026 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2027 } else {
2028 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2029 "COMPUTE_PGM_RSRC3",
2030 "must be zero on gfx10");
2031 }
2032
2033 // Bits [12].
2034 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2035 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2036
2037 // Bits [13].
2038 if (isGFX12Plus()) {
2040 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2041 } else {
2042 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2043 "COMPUTE_PGM_RSRC3",
2044 "must be zero on gfx10 or gfx11");
2045 }
2046
2047 // Bits [14-30].
2048 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2049 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2050
2051 // Bits [31].
2052 if (isGFX11Plus()) {
2054 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2055 } else {
2056 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2057 "COMPUTE_PGM_RSRC3",
2058 "must be zero on gfx10");
2059 }
2060 } else if (FourByteBuffer) {
2061 return createStringError(
2062 std::errc::invalid_argument,
2063 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2064 }
2065 return true;
2066}
2067#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2068#undef PRINT_DIRECTIVE
2069#undef GET_FIELD
2070#undef CHECK_RESERVED_BITS_IMPL
2071#undef CHECK_RESERVED_BITS
2072#undef CHECK_RESERVED_BITS_MSG
2073#undef CHECK_RESERVED_BITS_DESC
2074#undef CHECK_RESERVED_BITS_DESC_MSG
2075
2076/// Create an error object to return from onSymbolStart for reserved kernel
2077/// descriptor bits being set.
2078static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2079 const char *Msg = "") {
2080 return createStringError(
2081 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2082 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2083}
2084
2085/// Create an error object to return from onSymbolStart for reserved kernel
2086/// descriptor bytes being set.
2087static Error createReservedKDBytesError(unsigned BaseInBytes,
2088 unsigned WidthInBytes) {
2089 // Create an error comment in the same format as the "Kernel Descriptor"
2090 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2091 return createStringError(
2092 std::errc::invalid_argument,
2093 "kernel descriptor reserved bits in range (%u:%u) set",
2094 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2095}
2096
2099 raw_string_ostream &KdStream) const {
2100#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2101 do { \
2102 KdStream << Indent << DIRECTIVE " " \
2103 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2104 } while (0)
2105
2106 uint16_t TwoByteBuffer = 0;
2107 uint32_t FourByteBuffer = 0;
2108
2109 StringRef ReservedBytes;
2110 StringRef Indent = "\t";
2111
2112 assert(Bytes.size() == 64);
2113 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2114
2115 switch (Cursor.tell()) {
2117 FourByteBuffer = DE.getU32(Cursor);
2118 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2119 << '\n';
2120 return true;
2121
2123 FourByteBuffer = DE.getU32(Cursor);
2124 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2125 << FourByteBuffer << '\n';
2126 return true;
2127
2129 FourByteBuffer = DE.getU32(Cursor);
2130 KdStream << Indent << ".amdhsa_kernarg_size "
2131 << FourByteBuffer << '\n';
2132 return true;
2133
2135 // 4 reserved bytes, must be 0.
2136 ReservedBytes = DE.getBytes(Cursor, 4);
2137 for (int I = 0; I < 4; ++I) {
2138 if (ReservedBytes[I] != 0)
2140 }
2141 return true;
2142
2144 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2145 // So far no directive controls this for Code Object V3, so simply skip for
2146 // disassembly.
2147 DE.skip(Cursor, 8);
2148 return true;
2149
2151 // 20 reserved bytes, must be 0.
2152 ReservedBytes = DE.getBytes(Cursor, 20);
2153 for (int I = 0; I < 20; ++I) {
2154 if (ReservedBytes[I] != 0)
2156 }
2157 return true;
2158
2160 FourByteBuffer = DE.getU32(Cursor);
2161 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2162
2164 FourByteBuffer = DE.getU32(Cursor);
2165 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2166
2168 FourByteBuffer = DE.getU32(Cursor);
2169 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2170
2172 using namespace amdhsa;
2173 TwoByteBuffer = DE.getU16(Cursor);
2174
2176 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2177 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2178 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2179 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2180 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2181 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2182 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2183 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2184 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2185 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2187 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2188 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2189 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2190 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2191
2192 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2193 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2195
2196 // Reserved for GFX9
2197 if (isGFX9() &&
2198 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2200 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2201 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2202 } else if (isGFX10Plus()) {
2203 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2204 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2205 }
2206
2207 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2208 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2209 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2210
2211 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2212 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2214 }
2215
2216 return true;
2217
2219 using namespace amdhsa;
2220 TwoByteBuffer = DE.getU16(Cursor);
2221 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2222 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2223 KERNARG_PRELOAD_SPEC_LENGTH);
2224 }
2225
2226 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2227 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2228 KERNARG_PRELOAD_SPEC_OFFSET);
2229 }
2230 return true;
2231
2233 // 4 bytes from here are reserved, must be 0.
2234 ReservedBytes = DE.getBytes(Cursor, 4);
2235 for (int I = 0; I < 4; ++I) {
2236 if (ReservedBytes[I] != 0)
2238 }
2239 return true;
2240
2241 default:
2242 llvm_unreachable("Unhandled index. Case statements cover everything.");
2243 return true;
2244 }
2245#undef PRINT_DIRECTIVE
2246}
2247
2249 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2250
2251 // CP microcode requires the kernel descriptor to be 64 aligned.
2252 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2253 return createStringError(std::errc::invalid_argument,
2254 "kernel descriptor must be 64-byte aligned");
2255
2256 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2257 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2258 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2259 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2260 // when required.
2261 if (isGFX10Plus()) {
2262 uint16_t KernelCodeProperties =
2265 EnableWavefrontSize32 =
2266 AMDHSA_BITS_GET(KernelCodeProperties,
2267 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2268 }
2269
2270 std::string Kd;
2271 raw_string_ostream KdStream(Kd);
2272 KdStream << ".amdhsa_kernel " << KdName << '\n';
2273
2275 while (C && C.tell() < Bytes.size()) {
2276 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2277
2278 cantFail(C.takeError());
2279
2280 if (!Res)
2281 return Res;
2282 }
2283 KdStream << ".end_amdhsa_kernel\n";
2284 outs() << KdStream.str();
2285 return true;
2286}
2287
2289 uint64_t &Size,
2290 ArrayRef<uint8_t> Bytes,
2291 uint64_t Address) const {
2292 // Right now only kernel descriptor needs to be handled.
2293 // We ignore all other symbols for target specific handling.
2294 // TODO:
2295 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2296 // Object V2 and V3 when symbols are marked protected.
2297
2298 // amd_kernel_code_t for Code Object V2.
2299 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2300 Size = 256;
2301 return createStringError(std::errc::invalid_argument,
2302 "code object v2 is not supported");
2303 }
2304
2305 // Code Object V3 kernel descriptors.
2306 StringRef Name = Symbol.Name;
2307 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2308 Size = 64; // Size = 64 regardless of success or failure.
2309 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2310 }
2311
2312 return false;
2313}
2314
2315//===----------------------------------------------------------------------===//
2316// AMDGPUSymbolizer
2317//===----------------------------------------------------------------------===//
2318
2319// Try to find symbol name for specified label
2321 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2322 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2323 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2324
2325 if (!IsBranch) {
2326 return false;
2327 }
2328
2329 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2330 if (!Symbols)
2331 return false;
2332
2333 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2334 return Val.Addr == static_cast<uint64_t>(Value) &&
2335 Val.Type == ELF::STT_NOTYPE;
2336 });
2337 if (Result != Symbols->end()) {
2338 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2339 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2341 return true;
2342 }
2343 // Add to list of referenced addresses, so caller can synthesize a label.
2344 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2345 return false;
2346}
2347
2349 int64_t Value,
2350 uint64_t Address) {
2351 llvm_unreachable("unimplemented");
2352}
2353
2354//===----------------------------------------------------------------------===//
2355// Initialization
2356//===----------------------------------------------------------------------===//
2357
2359 LLVMOpInfoCallback /*GetOpInfo*/,
2360 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2361 void *DisInfo,
2362 MCContext *Ctx,
2363 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2364 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2365}
2366
2368 const MCSubtargetInfo &STI,
2369 MCContext &Ctx) {
2370 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2371}
2372
2378}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:439
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:678
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1324
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1338
@ STT_OBJECT
Definition: ELF.h:1325
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:401
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1258
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:159
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:749
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.