LLVM 19.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
46
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
52 // ToDo: AMDGPUDisassembler supports only VI ISA.
53 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
54 report_fatal_error("Disassembly not yet supported for subtarget");
55}
56
57void AMDGPUDisassembler::setABIVersion(unsigned Version) {
58 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
59}
60
62addOperand(MCInst &Inst, const MCOperand& Opnd) {
63 Inst.addOperand(Opnd);
64 return Opnd.isValid() ?
67}
68
70 uint16_t NameIdx) {
71 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
72 if (OpIdx != -1) {
73 auto I = MI.begin();
74 std::advance(I, OpIdx);
75 MI.insert(I, Op);
76 }
77 return OpIdx;
78}
79
80static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
82 const MCDisassembler *Decoder) {
83 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
84
85 // Our branches take a simm16, but we need two extra bits to account for the
86 // factor of 4.
87 APInt SignedOffset(18, Imm * 4, true);
88 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
89
90 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
92 return addOperand(Inst, MCOperand::createImm(Imm));
93}
94
95static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
98 int64_t Offset;
99 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
100 Offset = SignExtend64<24>(Imm);
101 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
102 Offset = Imm & 0xFFFFF;
103 } else { // GFX9+ supports 21-bit signed offsets.
104 Offset = SignExtend64<21>(Imm);
105 }
107}
108
109static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
112 return addOperand(Inst, DAsm->decodeBoolReg(Val));
113}
114
115static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
117 const MCDisassembler *Decoder) {
118 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
119 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
120}
121
122static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
123 const MCDisassembler *Decoder) {
124 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
125 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
126}
127
128#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
129 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
130 uint64_t /*Addr*/, \
131 const MCDisassembler *Decoder) { \
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
133 return addOperand(Inst, DAsm->DecoderName(Imm)); \
134 }
135
136// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
137// number of register. Used by VGPR only and AGPR only operands.
138#define DECODE_OPERAND_REG_8(RegClass) \
139 static DecodeStatus Decode##RegClass##RegisterClass( \
140 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
141 const MCDisassembler *Decoder) { \
142 assert(Imm < (1 << 8) && "8-bit encoding"); \
143 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
144 return addOperand( \
145 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
146 }
147
148#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
149 ImmWidth) \
150 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
151 const MCDisassembler *Decoder) { \
152 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
153 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
154 return addOperand(Inst, \
155 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
156 MandatoryLiteral, ImmWidth)); \
157 }
158
159static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
161 unsigned Imm, unsigned EncImm,
162 bool MandatoryLiteral, unsigned ImmWidth,
164 const MCDisassembler *Decoder) {
165 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
167 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
168 ImmWidth, Sema));
169}
170
171// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
172// get register class. Used by SGPR only operands.
173#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
174 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
175
176// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
177// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
178// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
179// Used by AV_ register classes (AGPR or VGPR only register operands).
180template <AMDGPUDisassembler::OpWidthTy OpWidth>
181static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
182 const MCDisassembler *Decoder) {
183 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
184 false, 0, AMDGPU::OperandSemantics::INT, Decoder);
185}
186
187// Decoder for Src(9-bit encoding) registers only.
188template <AMDGPUDisassembler::OpWidthTy OpWidth>
189static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
190 uint64_t /* Addr */,
191 const MCDisassembler *Decoder) {
192 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
194}
195
196// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
197// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
198// only.
199template <AMDGPUDisassembler::OpWidthTy OpWidth>
200static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
204}
205
206// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
207// Imm{9} is acc, registers only.
208template <AMDGPUDisassembler::OpWidthTy OpWidth>
209static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
210 uint64_t /* Addr */,
211 const MCDisassembler *Decoder) {
212 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
214}
215
216// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
217// register from RegClass or immediate. Registers that don't belong to RegClass
218// will be decoded and InstPrinter will report warning. Immediate will be
219// decoded into constant of size ImmWidth, should match width of immediate used
220// by OperandType (important for floating point types).
221template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
222 unsigned OperandSemantics>
223static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
224 uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
227 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
228}
229
230// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
231// and decode using 'enum10' from decodeSrcOp.
232template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
233 unsigned OperandSemantics>
234static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
235 uint64_t /* Addr */,
236 const MCDisassembler *Decoder) {
237 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
238 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
239}
240
241template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
242 unsigned OperandSemantics>
244 uint64_t /* Addr */,
245 const MCDisassembler *Decoder) {
246 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
247 (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
248}
249
250// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
251// when RegisterClass is used as an operand. Most often used for destination
252// operands.
253
255DECODE_OPERAND_REG_8(VGPR_32_Lo128)
258DECODE_OPERAND_REG_8(VReg_128)
259DECODE_OPERAND_REG_8(VReg_256)
260DECODE_OPERAND_REG_8(VReg_288)
261DECODE_OPERAND_REG_8(VReg_352)
262DECODE_OPERAND_REG_8(VReg_384)
263DECODE_OPERAND_REG_8(VReg_512)
264DECODE_OPERAND_REG_8(VReg_1024)
265
266DECODE_OPERAND_REG_7(SReg_32, OPW32)
267DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
268DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
269DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
270DECODE_OPERAND_REG_7(SReg_64, OPW64)
271DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
272DECODE_OPERAND_REG_7(SReg_96, OPW96)
273DECODE_OPERAND_REG_7(SReg_128, OPW128)
274DECODE_OPERAND_REG_7(SReg_256, OPW256)
275DECODE_OPERAND_REG_7(SReg_512, OPW512)
276
279DECODE_OPERAND_REG_8(AReg_128)
280DECODE_OPERAND_REG_8(AReg_256)
281DECODE_OPERAND_REG_8(AReg_512)
282DECODE_OPERAND_REG_8(AReg_1024)
283
285 uint64_t /*Addr*/,
286 const MCDisassembler *Decoder) {
287 assert(isUInt<10>(Imm) && "10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
289
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
292 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
293 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
294}
295
296static DecodeStatus
298 const MCDisassembler *Decoder) {
299 assert(isUInt<8>(Imm) && "8-bit encoding expected");
300
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
303 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
304 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
305}
306
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 bool IsVGPR = Imm & (1 << 8);
314 if (IsVGPR) {
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318 }
319 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
320 Imm & 0xFF, false, 16));
321}
322
323static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
324 uint64_t /*Addr*/,
325 const MCDisassembler *Decoder) {
326 assert(isUInt<10>(Imm) && "10-bit encoding expected");
327
328 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
329 bool IsVGPR = Imm & (1 << 8);
330 if (IsVGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
336 Imm & 0xFF, false, 16));
337}
338
339static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
341 const MCDisassembler *Decoder) {
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
344}
345
346static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
347 uint64_t Addr, const void *Decoder) {
348 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
349 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
350}
351
352static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
353 const MCRegisterInfo *MRI) {
354 if (OpIdx < 0)
355 return false;
356
357 const MCOperand &Op = Inst.getOperand(OpIdx);
358 if (!Op.isReg())
359 return false;
360
361 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
362 auto Reg = Sub ? Sub : Op.getReg();
363 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
364}
365
366static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
368 const MCDisassembler *Decoder) {
369 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
370 if (!DAsm->isGFX90A()) {
371 Imm &= 511;
372 } else {
373 // If atomic has both vdata and vdst their register classes are tied.
374 // The bit is decoded along with the vdst, first operand. We need to
375 // change register class to AGPR if vdst was AGPR.
376 // If a DS instruction has both data0 and data1 their register classes
377 // are also tied.
378 unsigned Opc = Inst.getOpcode();
379 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
380 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
381 : AMDGPU::OpName::vdata;
382 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
383 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
384 if ((int)Inst.getNumOperands() == DataIdx) {
385 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
386 if (IsAGPROperand(Inst, DstIdx, MRI))
387 Imm |= 512;
388 }
389
390 if (TSFlags & SIInstrFlags::DS) {
391 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
392 if ((int)Inst.getNumOperands() == Data2Idx &&
393 IsAGPROperand(Inst, DataIdx, MRI))
394 Imm |= 512;
395 }
396 }
397 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
398}
399
400template <AMDGPUDisassembler::OpWidthTy Opw>
401static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
402 uint64_t /* Addr */,
403 const MCDisassembler *Decoder) {
404 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
405}
406
407static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
409 const MCDisassembler *Decoder) {
410 assert(Imm < (1 << 9) && "9-bit encoding");
411 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
412 return addOperand(Inst,
413 DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
415}
416
417#define DECODE_SDWA(DecName) \
418DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
419
420DECODE_SDWA(Src32)
421DECODE_SDWA(Src16)
422DECODE_SDWA(VopcDst)
423
424#include "AMDGPUGenDisassemblerTables.inc"
425
426//===----------------------------------------------------------------------===//
427//
428//===----------------------------------------------------------------------===//
429
430template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
431 assert(Bytes.size() >= sizeof(T));
432 const auto Res =
433 support::endian::read<T, llvm::endianness::little>(Bytes.data());
434 Bytes = Bytes.slice(sizeof(T));
435 return Res;
436}
437
439 assert(Bytes.size() >= 12);
440 uint64_t Lo =
441 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
442 Bytes = Bytes.slice(8);
443 uint64_t Hi =
444 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
445 Bytes = Bytes.slice(4);
446 return DecoderUInt128(Lo, Hi);
447}
448
450 ArrayRef<uint8_t> Bytes_,
452 raw_ostream &CS) const {
453 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
454 Bytes = Bytes_.slice(0, MaxInstBytesNum);
455
456 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
457 // there are fewer bytes left). This will be overridden on success.
458 Size = std::min((size_t)4, Bytes_.size());
459
460 do {
461 // ToDo: better to switch encoding length using some bit predicate
462 // but it is unknown yet, so try all we can
463
464 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
465 // encodings
466 if (isGFX11Plus() && Bytes.size() >= 12 ) {
467 DecoderUInt128 DecW = eat12Bytes(Bytes);
468
469 if (isGFX11() &&
470 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
471 DecW, Address, CS))
472 break;
473
474 if (isGFX12() &&
475 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
476 DecW, Address, CS))
477 break;
478
479 if (isGFX12() &&
480 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
481 break;
482 }
483
484 // Reinitialize Bytes
485 Bytes = Bytes_.slice(0, MaxInstBytesNum);
486
487 if (Bytes.size() >= 8) {
488 const uint64_t QW = eatBytes<uint64_t>(Bytes);
489
490 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
491 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
492 break;
493
494 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
495 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
496 break;
497
498 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
499 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
500 // table first so we print the correct name.
501 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
502 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
503 break;
504
505 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
506 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
507 break;
508
509 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
510 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
511 break;
512
513 if ((isVI() || isGFX9()) &&
514 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
515 break;
516
517 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
518 break;
519
520 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
521 break;
522
523 if (isGFX12() &&
524 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
525 Address, CS))
526 break;
527
528 if (isGFX11() &&
529 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
530 Address, CS))
531 break;
532
533 if (isGFX11() &&
534 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
535 break;
536
537 if (isGFX12() &&
538 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
539 break;
540 }
541
542 // Reinitialize Bytes
543 Bytes = Bytes_.slice(0, MaxInstBytesNum);
544
545 // Try decode 32-bit instruction
546 if (Bytes.size() >= 4) {
547 const uint32_t DW = eatBytes<uint32_t>(Bytes);
548
549 if ((isVI() || isGFX9()) &&
550 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
551 break;
552
553 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
554 break;
555
556 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
557 break;
558
559 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
560 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
561 break;
562
563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
564 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
565 break;
566
567 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
568 break;
569
570 if (isGFX11() &&
571 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
572 Address, CS))
573 break;
574
575 if (isGFX12() &&
576 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
577 Address, CS))
578 break;
579 }
580
582 } while (false);
583
584 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
585 if (isMacDPP(MI))
587
588 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
590 else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
591 AMDGPU::isVOPC64DPP(MI.getOpcode()))
592 convertVOPCDPPInst(MI); // Special VOP3 case
593 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
594 -1)
596 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
597 convertVOP3DPPInst(MI); // Regular VOP3 case
598 }
599
600 if (AMDGPU::isMAC(MI.getOpcode())) {
601 // Insert dummy unused src2_modifiers.
603 AMDGPU::OpName::src2_modifiers);
604 }
605
606 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
607 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
608 // Insert dummy unused src2_modifiers.
610 AMDGPU::OpName::src2_modifiers);
611 }
612
613 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
615 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
616 }
617
618 if (MCII->get(MI.getOpcode()).TSFlags &
620 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
621 AMDGPU::OpName::cpol);
622 if (CPolPos != -1) {
623 unsigned CPol =
624 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
626 if (MI.getNumOperands() <= (unsigned)CPolPos) {
628 AMDGPU::OpName::cpol);
629 } else if (CPol) {
630 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
631 }
632 }
633 }
634
635 if ((MCII->get(MI.getOpcode()).TSFlags &
637 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
638 // GFX90A lost TFE, its place is occupied by ACC.
639 int TFEOpIdx =
640 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
641 if (TFEOpIdx != -1) {
642 auto TFEIter = MI.begin();
643 std::advance(TFEIter, TFEOpIdx);
644 MI.insert(TFEIter, MCOperand::createImm(0));
645 }
646 }
647
648 if (MCII->get(MI.getOpcode()).TSFlags &
650 int SWZOpIdx =
651 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
652 if (SWZOpIdx != -1) {
653 auto SWZIter = MI.begin();
654 std::advance(SWZIter, SWZOpIdx);
655 MI.insert(SWZIter, MCOperand::createImm(0));
656 }
657 }
658
659 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
660 int VAddr0Idx =
661 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
662 int RsrcIdx =
663 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
664 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
665 if (VAddr0Idx >= 0 && NSAArgs > 0) {
666 unsigned NSAWords = (NSAArgs + 3) / 4;
667 if (Bytes.size() < 4 * NSAWords)
669 for (unsigned i = 0; i < NSAArgs; ++i) {
670 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
671 auto VAddrRCID =
672 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
673 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
674 }
675 Bytes = Bytes.slice(4 * NSAWords);
676 }
677
679 }
680
681 if (MCII->get(MI.getOpcode()).TSFlags &
684
685 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
687
688 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
690
691 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
693
694 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
695 AMDGPU::OpName::vdst_in);
696 if (VDstIn_Idx != -1) {
697 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
699 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
700 !MI.getOperand(VDstIn_Idx).isReg() ||
701 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
702 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
703 MI.erase(&MI.getOperand(VDstIn_Idx));
705 MCOperand::createReg(MI.getOperand(Tied).getReg()),
706 AMDGPU::OpName::vdst_in);
707 }
708 }
709
710 int ImmLitIdx =
711 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
712 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
713 if (ImmLitIdx != -1 && !IsSOPK)
714 convertFMAanyK(MI, ImmLitIdx);
715
716 Size = MaxInstBytesNum - Bytes.size();
718}
719
721 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
722 // The MCInst still has these fields even though they are no longer encoded
723 // in the GFX11 instruction.
724 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
725 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
726 }
727}
728
730 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
731 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
732 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
733 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
734 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
735 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
736 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
737 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
738 // The MCInst has this field that is not directly encoded in the
739 // instruction.
740 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
741 }
742}
743
745 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
746 STI.hasFeature(AMDGPU::FeatureGFX10)) {
747 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
748 // VOPC - insert clamp
749 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
750 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
751 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
752 if (SDst != -1) {
753 // VOPC - insert VCC register as sdst
755 AMDGPU::OpName::sdst);
756 } else {
757 // VOP1/2 - insert omod if present in instruction
758 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
759 }
760 }
761}
762
764 unsigned OpSel = 0;
765 unsigned OpSelHi = 0;
766 unsigned NegLo = 0;
767 unsigned NegHi = 0;
768};
769
770// Reconstruct values of VOP3/VOP3P operands such as op_sel.
771// Note that these values do not affect disassembler output,
772// so this is only necessary for consistency with src_modifiers.
774 bool IsVOP3P = false) {
775 VOPModifiers Modifiers;
776 unsigned Opc = MI.getOpcode();
777 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
778 AMDGPU::OpName::src1_modifiers,
779 AMDGPU::OpName::src2_modifiers};
780 for (int J = 0; J < 3; ++J) {
781 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
782 if (OpIdx == -1)
783 continue;
784
785 unsigned Val = MI.getOperand(OpIdx).getImm();
786
787 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
788 if (IsVOP3P) {
789 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
790 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
791 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
792 } else if (J == 0) {
793 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
794 }
795 }
796
797 return Modifiers;
798}
799
800// Instructions decode the op_sel/suffix bits into the src_modifier
801// operands. Copy those bits into the src operands for true16 VGPRs.
803 const unsigned Opc = MI.getOpcode();
804 const MCRegisterClass &ConversionRC =
805 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
806 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
807 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
809 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
811 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
813 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
815 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
816 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
817 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
818 if (OpIdx == -1 || OpModsIdx == -1)
819 continue;
820 MCOperand &Op = MI.getOperand(OpIdx);
821 if (!Op.isReg())
822 continue;
823 if (!ConversionRC.contains(Op.getReg()))
824 continue;
825 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
826 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
827 unsigned ModVal = OpMods.getImm();
828 if (ModVal & OpSelMask) { // isHi
829 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
830 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
831 }
832 }
833}
834
835// MAC opcodes have special old and src2 operands.
836// src2 is tied to dst, while old is not tied (but assumed to be).
838 constexpr int DST_IDX = 0;
839 auto Opcode = MI.getOpcode();
840 const auto &Desc = MCII->get(Opcode);
841 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
842
843 if (OldIdx != -1 && Desc.getOperandConstraint(
844 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
845 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
846 assert(Desc.getOperandConstraint(
847 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
849 (void)DST_IDX;
850 return true;
851 }
852
853 return false;
854}
855
856// Create dummy old operand and insert dummy unused src2_modifiers
858 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
859 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
861 AMDGPU::OpName::src2_modifiers);
862}
863
865 unsigned Opc = MI.getOpcode();
866
867 int VDstInIdx =
868 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
869 if (VDstInIdx != -1)
870 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
871
872 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
873 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
874 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
875
876 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
877 if (MI.getNumOperands() < DescNumOps &&
878 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
880 auto Mods = collectVOPModifiers(MI);
882 AMDGPU::OpName::op_sel);
883 } else {
884 // Insert dummy unused src modifiers.
885 if (MI.getNumOperands() < DescNumOps &&
886 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
888 AMDGPU::OpName::src0_modifiers);
889
890 if (MI.getNumOperands() < DescNumOps &&
891 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
893 AMDGPU::OpName::src1_modifiers);
894 }
895}
896
899
900 int VDstInIdx =
901 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
902 if (VDstInIdx != -1)
903 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
904
905 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
906 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
907 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
908
909 unsigned Opc = MI.getOpcode();
910 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
911 if (MI.getNumOperands() < DescNumOps &&
912 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
913 auto Mods = collectVOPModifiers(MI);
915 AMDGPU::OpName::op_sel);
916 }
917}
918
919// Note that before gfx10, the MIMG encoding provided no information about
920// VADDR size. Consequently, decoded instructions always show address as if it
921// has 1 dword, which could be not really so.
923 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
924
925 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
926 AMDGPU::OpName::vdst);
927
928 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
929 AMDGPU::OpName::vdata);
930 int VAddr0Idx =
931 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
932 int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
933 : AMDGPU::OpName::rsrc;
934 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
935 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
936 AMDGPU::OpName::dmask);
937
938 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
939 AMDGPU::OpName::tfe);
940 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
941 AMDGPU::OpName::d16);
942
943 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
944 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
946
947 assert(VDataIdx != -1);
948 if (BaseOpcode->BVH) {
949 // Add A16 operand for intersect_ray instructions
950 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
951 return;
952 }
953
954 bool IsAtomic = (VDstIdx != -1);
955 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
956 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
957 bool IsNSA = false;
958 bool IsPartialNSA = false;
959 unsigned AddrSize = Info->VAddrDwords;
960
961 if (isGFX10Plus()) {
962 unsigned DimIdx =
963 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
964 int A16Idx =
965 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
966 const AMDGPU::MIMGDimInfo *Dim =
967 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
968 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
969
970 AddrSize =
971 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
972
973 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
974 // VIMAGE insts other than BVH never use vaddr4.
975 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
976 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
977 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
978 if (!IsNSA) {
979 if (!IsVSample && AddrSize > 12)
980 AddrSize = 16;
981 } else {
982 if (AddrSize > Info->VAddrDwords) {
983 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
984 // The NSA encoding does not contain enough operands for the
985 // combination of base opcode / dimension. Should this be an error?
986 return;
987 }
988 IsPartialNSA = true;
989 }
990 }
991 }
992
993 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
994 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
995
996 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
997 if (D16 && AMDGPU::hasPackedD16(STI)) {
998 DstSize = (DstSize + 1) / 2;
999 }
1000
1001 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1002 DstSize += 1;
1003
1004 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1005 return;
1006
1007 int NewOpcode =
1008 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1009 if (NewOpcode == -1)
1010 return;
1011
1012 // Widen the register to the correct number of enabled channels.
1013 unsigned NewVdata = AMDGPU::NoRegister;
1014 if (DstSize != Info->VDataDwords) {
1015 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1016
1017 // Get first subregister of VData
1018 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1019 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1020 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1021
1022 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1023 &MRI.getRegClass(DataRCID));
1024 if (NewVdata == AMDGPU::NoRegister) {
1025 // It's possible to encode this such that the low register + enabled
1026 // components exceeds the register count.
1027 return;
1028 }
1029 }
1030
1031 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1032 // If using partial NSA on GFX11+ widen last address register.
1033 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1034 unsigned NewVAddrSA = AMDGPU::NoRegister;
1035 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1036 AddrSize != Info->VAddrDwords) {
1037 unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1038 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1039 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1040
1041 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1042 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1043 &MRI.getRegClass(AddrRCID));
1044 if (!NewVAddrSA)
1045 return;
1046 }
1047
1048 MI.setOpcode(NewOpcode);
1049
1050 if (NewVdata != AMDGPU::NoRegister) {
1051 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1052
1053 if (IsAtomic) {
1054 // Atomic operations have an additional operand (a copy of data)
1055 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1056 }
1057 }
1058
1059 if (NewVAddrSA) {
1060 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1061 } else if (IsNSA) {
1062 assert(AddrSize <= Info->VAddrDwords);
1063 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1064 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1065 }
1066}
1067
1068// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1069// decoder only adds to src_modifiers, so manually add the bits to the other
1070// operands.
1072 unsigned Opc = MI.getOpcode();
1073 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1074 auto Mods = collectVOPModifiers(MI, true);
1075
1076 if (MI.getNumOperands() < DescNumOps &&
1077 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1078 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1079
1080 if (MI.getNumOperands() < DescNumOps &&
1081 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1083 AMDGPU::OpName::op_sel);
1084 if (MI.getNumOperands() < DescNumOps &&
1085 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1087 AMDGPU::OpName::op_sel_hi);
1088 if (MI.getNumOperands() < DescNumOps &&
1089 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1091 AMDGPU::OpName::neg_lo);
1092 if (MI.getNumOperands() < DescNumOps &&
1093 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1095 AMDGPU::OpName::neg_hi);
1096}
1097
1098// Create dummy old operand and insert optional operands
1100 unsigned Opc = MI.getOpcode();
1101 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1102
1103 if (MI.getNumOperands() < DescNumOps &&
1104 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1105 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1106
1107 if (MI.getNumOperands() < DescNumOps &&
1108 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1110 AMDGPU::OpName::src0_modifiers);
1111
1112 if (MI.getNumOperands() < DescNumOps &&
1113 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1115 AMDGPU::OpName::src1_modifiers);
1116}
1117
1118void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1119 assert(HasLiteral && "Should have decoded a literal");
1120 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1121 unsigned DescNumOps = Desc.getNumOperands();
1123 AMDGPU::OpName::immDeferred);
1124 assert(DescNumOps == MI.getNumOperands());
1125 for (unsigned I = 0; I < DescNumOps; ++I) {
1126 auto &Op = MI.getOperand(I);
1127 auto OpType = Desc.operands()[I].OperandType;
1128 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1130 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1131 IsDeferredOp)
1132 Op.setImm(Literal);
1133 }
1134}
1135
1136const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1137 return getContext().getRegisterInfo()->
1138 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1139}
1140
1141inline
1143 const Twine& ErrMsg) const {
1144 *CommentStream << "Error: " + ErrMsg;
1145
1146 // ToDo: add support for error operands to MCInst.h
1147 // return MCOperand::createError(V);
1148 return MCOperand();
1149}
1150
1151inline
1154}
1155
1156inline
1158 unsigned Val) const {
1159 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1160 if (Val >= RegCl.getNumRegs())
1161 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1162 ": unknown register " + Twine(Val));
1163 return createRegOperand(RegCl.getRegister(Val));
1164}
1165
1166inline
1168 unsigned Val) const {
1169 // ToDo: SI/CI have 104 SGPRs, VI - 102
1170 // Valery: here we accepting as much as we can, let assembler sort it out
1171 int shift = 0;
1172 switch (SRegClassID) {
1173 case AMDGPU::SGPR_32RegClassID:
1174 case AMDGPU::TTMP_32RegClassID:
1175 break;
1176 case AMDGPU::SGPR_64RegClassID:
1177 case AMDGPU::TTMP_64RegClassID:
1178 shift = 1;
1179 break;
1180 case AMDGPU::SGPR_96RegClassID:
1181 case AMDGPU::TTMP_96RegClassID:
1182 case AMDGPU::SGPR_128RegClassID:
1183 case AMDGPU::TTMP_128RegClassID:
1184 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1185 // this bundle?
1186 case AMDGPU::SGPR_256RegClassID:
1187 case AMDGPU::TTMP_256RegClassID:
1188 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1189 // this bundle?
1190 case AMDGPU::SGPR_288RegClassID:
1191 case AMDGPU::TTMP_288RegClassID:
1192 case AMDGPU::SGPR_320RegClassID:
1193 case AMDGPU::TTMP_320RegClassID:
1194 case AMDGPU::SGPR_352RegClassID:
1195 case AMDGPU::TTMP_352RegClassID:
1196 case AMDGPU::SGPR_384RegClassID:
1197 case AMDGPU::TTMP_384RegClassID:
1198 case AMDGPU::SGPR_512RegClassID:
1199 case AMDGPU::TTMP_512RegClassID:
1200 shift = 2;
1201 break;
1202 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1203 // this bundle?
1204 default:
1205 llvm_unreachable("unhandled register class");
1206 }
1207
1208 if (Val % (1 << shift)) {
1209 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1210 << ": scalar reg isn't aligned " << Val;
1211 }
1212
1213 return createRegOperand(SRegClassID, Val >> shift);
1214}
1215
1217 bool IsHi) const {
1218 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1219 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1220}
1221
1222// Decode Literals for insts which always have a literal in the encoding
1225 if (HasLiteral) {
1226 assert(
1228 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1229 if (Literal != Val)
1230 return errOperand(Val, "More than one unique literal is illegal");
1231 }
1232 HasLiteral = true;
1233 Literal = Val;
1234 return MCOperand::createImm(Literal);
1235}
1236
1238 // For now all literal constants are supposed to be unsigned integer
1239 // ToDo: deal with signed/unsigned 64-bit integer constants
1240 // ToDo: deal with float/double constants
1241 if (!HasLiteral) {
1242 if (Bytes.size() < 4) {
1243 return errOperand(0, "cannot read literal, inst bytes left " +
1244 Twine(Bytes.size()));
1245 }
1246 HasLiteral = true;
1247 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1248 if (ExtendFP64)
1249 Literal64 <<= 32;
1250 }
1251 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1252}
1253
1255 using namespace AMDGPU::EncValues;
1256
1257 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1258 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1259 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1260 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1261 // Cast prevents negative overflow.
1262}
1263
1264static int64_t getInlineImmVal32(unsigned Imm) {
1265 switch (Imm) {
1266 case 240:
1267 return llvm::bit_cast<uint32_t>(0.5f);
1268 case 241:
1269 return llvm::bit_cast<uint32_t>(-0.5f);
1270 case 242:
1271 return llvm::bit_cast<uint32_t>(1.0f);
1272 case 243:
1273 return llvm::bit_cast<uint32_t>(-1.0f);
1274 case 244:
1275 return llvm::bit_cast<uint32_t>(2.0f);
1276 case 245:
1277 return llvm::bit_cast<uint32_t>(-2.0f);
1278 case 246:
1279 return llvm::bit_cast<uint32_t>(4.0f);
1280 case 247:
1281 return llvm::bit_cast<uint32_t>(-4.0f);
1282 case 248: // 1 / (2 * PI)
1283 return 0x3e22f983;
1284 default:
1285 llvm_unreachable("invalid fp inline imm");
1286 }
1287}
1288
1289static int64_t getInlineImmVal64(unsigned Imm) {
1290 switch (Imm) {
1291 case 240:
1292 return llvm::bit_cast<uint64_t>(0.5);
1293 case 241:
1294 return llvm::bit_cast<uint64_t>(-0.5);
1295 case 242:
1296 return llvm::bit_cast<uint64_t>(1.0);
1297 case 243:
1298 return llvm::bit_cast<uint64_t>(-1.0);
1299 case 244:
1300 return llvm::bit_cast<uint64_t>(2.0);
1301 case 245:
1302 return llvm::bit_cast<uint64_t>(-2.0);
1303 case 246:
1304 return llvm::bit_cast<uint64_t>(4.0);
1305 case 247:
1306 return llvm::bit_cast<uint64_t>(-4.0);
1307 case 248: // 1 / (2 * PI)
1308 return 0x3fc45f306dc9c882;
1309 default:
1310 llvm_unreachable("invalid fp inline imm");
1311 }
1312}
1313
1314static int64_t getInlineImmValF16(unsigned Imm) {
1315 switch (Imm) {
1316 case 240:
1317 return 0x3800;
1318 case 241:
1319 return 0xB800;
1320 case 242:
1321 return 0x3C00;
1322 case 243:
1323 return 0xBC00;
1324 case 244:
1325 return 0x4000;
1326 case 245:
1327 return 0xC000;
1328 case 246:
1329 return 0x4400;
1330 case 247:
1331 return 0xC400;
1332 case 248: // 1 / (2 * PI)
1333 return 0x3118;
1334 default:
1335 llvm_unreachable("invalid fp inline imm");
1336 }
1337}
1338
1339static int64_t getInlineImmValBF16(unsigned Imm) {
1340 switch (Imm) {
1341 case 240:
1342 return 0x3F00;
1343 case 241:
1344 return 0xBF00;
1345 case 242:
1346 return 0x3F80;
1347 case 243:
1348 return 0xBF80;
1349 case 244:
1350 return 0x4000;
1351 case 245:
1352 return 0xC000;
1353 case 246:
1354 return 0x4080;
1355 case 247:
1356 return 0xC080;
1357 case 248: // 1 / (2 * PI)
1358 return 0x3E22;
1359 default:
1360 llvm_unreachable("invalid fp inline imm");
1361 }
1362}
1363
1364static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1366 : getInlineImmValF16(Imm);
1367}
1368
1369MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1373
1374 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1375 // ImmWidth 0 is a default case where operand should not allow immediates.
1376 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1377 // use it to print verbose error message.
1378 switch (ImmWidth) {
1379 case 0:
1380 case 32:
1382 case 64:
1384 case 16:
1385 return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1386 default:
1387 llvm_unreachable("implement me");
1388 }
1389}
1390
1392 using namespace AMDGPU;
1393
1394 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1395 switch (Width) {
1396 default: // fall
1397 case OPW32:
1398 case OPW16:
1399 case OPWV216:
1400 return VGPR_32RegClassID;
1401 case OPW64:
1402 case OPWV232: return VReg_64RegClassID;
1403 case OPW96: return VReg_96RegClassID;
1404 case OPW128: return VReg_128RegClassID;
1405 case OPW160: return VReg_160RegClassID;
1406 case OPW256: return VReg_256RegClassID;
1407 case OPW288: return VReg_288RegClassID;
1408 case OPW320: return VReg_320RegClassID;
1409 case OPW352: return VReg_352RegClassID;
1410 case OPW384: return VReg_384RegClassID;
1411 case OPW512: return VReg_512RegClassID;
1412 case OPW1024: return VReg_1024RegClassID;
1413 }
1414}
1415
1417 using namespace AMDGPU;
1418
1419 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1420 switch (Width) {
1421 default: // fall
1422 case OPW32:
1423 case OPW16:
1424 case OPWV216:
1425 return AGPR_32RegClassID;
1426 case OPW64:
1427 case OPWV232: return AReg_64RegClassID;
1428 case OPW96: return AReg_96RegClassID;
1429 case OPW128: return AReg_128RegClassID;
1430 case OPW160: return AReg_160RegClassID;
1431 case OPW256: return AReg_256RegClassID;
1432 case OPW288: return AReg_288RegClassID;
1433 case OPW320: return AReg_320RegClassID;
1434 case OPW352: return AReg_352RegClassID;
1435 case OPW384: return AReg_384RegClassID;
1436 case OPW512: return AReg_512RegClassID;
1437 case OPW1024: return AReg_1024RegClassID;
1438 }
1439}
1440
1441
1443 using namespace AMDGPU;
1444
1445 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1446 switch (Width) {
1447 default: // fall
1448 case OPW32:
1449 case OPW16:
1450 case OPWV216:
1451 return SGPR_32RegClassID;
1452 case OPW64:
1453 case OPWV232: return SGPR_64RegClassID;
1454 case OPW96: return SGPR_96RegClassID;
1455 case OPW128: return SGPR_128RegClassID;
1456 case OPW160: return SGPR_160RegClassID;
1457 case OPW256: return SGPR_256RegClassID;
1458 case OPW288: return SGPR_288RegClassID;
1459 case OPW320: return SGPR_320RegClassID;
1460 case OPW352: return SGPR_352RegClassID;
1461 case OPW384: return SGPR_384RegClassID;
1462 case OPW512: return SGPR_512RegClassID;
1463 }
1464}
1465
1467 using namespace AMDGPU;
1468
1469 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1470 switch (Width) {
1471 default: // fall
1472 case OPW32:
1473 case OPW16:
1474 case OPWV216:
1475 return TTMP_32RegClassID;
1476 case OPW64:
1477 case OPWV232: return TTMP_64RegClassID;
1478 case OPW128: return TTMP_128RegClassID;
1479 case OPW256: return TTMP_256RegClassID;
1480 case OPW288: return TTMP_288RegClassID;
1481 case OPW320: return TTMP_320RegClassID;
1482 case OPW352: return TTMP_352RegClassID;
1483 case OPW384: return TTMP_384RegClassID;
1484 case OPW512: return TTMP_512RegClassID;
1485 }
1486}
1487
1488int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1489 using namespace AMDGPU::EncValues;
1490
1491 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1492 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1493
1494 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1495}
1496
1498 bool MandatoryLiteral,
1499 unsigned ImmWidth,
1500 AMDGPU::OperandSemantics Sema) const {
1501 using namespace AMDGPU::EncValues;
1502
1503 assert(Val < 1024); // enum10
1504
1505 bool IsAGPR = Val & 512;
1506 Val &= 511;
1507
1508 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1509 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1510 : getVgprClassId(Width), Val - VGPR_MIN);
1511 }
1512 return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1513 Sema);
1514}
1515
1518 bool MandatoryLiteral, unsigned ImmWidth,
1519 AMDGPU::OperandSemantics Sema) const {
1520 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1521 // decoded earlier.
1522 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1523 using namespace AMDGPU::EncValues;
1524
1525 if (Val <= SGPR_MAX) {
1526 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1527 static_assert(SGPR_MIN == 0);
1528 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1529 }
1530
1531 int TTmpIdx = getTTmpIdx(Val);
1532 if (TTmpIdx >= 0) {
1533 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1534 }
1535
1536 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1537 return decodeIntImmed(Val);
1538
1539 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1540 return decodeFPImmed(ImmWidth, Val, Sema);
1541
1542 if (Val == LITERAL_CONST) {
1543 if (MandatoryLiteral)
1544 // Keep a sentinel value for deferred setting
1545 return MCOperand::createImm(LITERAL_CONST);
1546 else
1548 }
1549
1550 switch (Width) {
1551 case OPW32:
1552 case OPW16:
1553 case OPWV216:
1554 return decodeSpecialReg32(Val);
1555 case OPW64:
1556 case OPWV232:
1557 return decodeSpecialReg64(Val);
1558 default:
1559 llvm_unreachable("unexpected immediate type");
1560 }
1561}
1562
1563// Bit 0 of DstY isn't stored in the instruction, because it's always the
1564// opposite of bit 0 of DstX.
1566 unsigned Val) const {
1567 int VDstXInd =
1568 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1569 assert(VDstXInd != -1);
1570 assert(Inst.getOperand(VDstXInd).isReg());
1571 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1572 Val |= ~XDstReg & 1;
1574 return createRegOperand(getVgprClassId(Width), Val);
1575}
1576
1578 using namespace AMDGPU;
1579
1580 switch (Val) {
1581 // clang-format off
1582 case 102: return createRegOperand(FLAT_SCR_LO);
1583 case 103: return createRegOperand(FLAT_SCR_HI);
1584 case 104: return createRegOperand(XNACK_MASK_LO);
1585 case 105: return createRegOperand(XNACK_MASK_HI);
1586 case 106: return createRegOperand(VCC_LO);
1587 case 107: return createRegOperand(VCC_HI);
1588 case 108: return createRegOperand(TBA_LO);
1589 case 109: return createRegOperand(TBA_HI);
1590 case 110: return createRegOperand(TMA_LO);
1591 case 111: return createRegOperand(TMA_HI);
1592 case 124:
1593 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1594 case 125:
1595 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1596 case 126: return createRegOperand(EXEC_LO);
1597 case 127: return createRegOperand(EXEC_HI);
1598 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1599 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1600 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1601 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1602 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1603 case 251: return createRegOperand(SRC_VCCZ);
1604 case 252: return createRegOperand(SRC_EXECZ);
1605 case 253: return createRegOperand(SRC_SCC);
1606 case 254: return createRegOperand(LDS_DIRECT);
1607 default: break;
1608 // clang-format on
1609 }
1610 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1611}
1612
1614 using namespace AMDGPU;
1615
1616 switch (Val) {
1617 case 102: return createRegOperand(FLAT_SCR);
1618 case 104: return createRegOperand(XNACK_MASK);
1619 case 106: return createRegOperand(VCC);
1620 case 108: return createRegOperand(TBA);
1621 case 110: return createRegOperand(TMA);
1622 case 124:
1623 if (isGFX11Plus())
1624 return createRegOperand(SGPR_NULL);
1625 break;
1626 case 125:
1627 if (!isGFX11Plus())
1628 return createRegOperand(SGPR_NULL);
1629 break;
1630 case 126: return createRegOperand(EXEC);
1631 case 235: return createRegOperand(SRC_SHARED_BASE);
1632 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1633 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1634 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1635 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1636 case 251: return createRegOperand(SRC_VCCZ);
1637 case 252: return createRegOperand(SRC_EXECZ);
1638 case 253: return createRegOperand(SRC_SCC);
1639 default: break;
1640 }
1641 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1642}
1643
1645AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1646 unsigned ImmWidth,
1647 AMDGPU::OperandSemantics Sema) const {
1648 using namespace AMDGPU::SDWA;
1649 using namespace AMDGPU::EncValues;
1650
1651 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1652 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1653 // XXX: cast to int is needed to avoid stupid warning:
1654 // compare with unsigned is always true
1655 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1656 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1657 return createRegOperand(getVgprClassId(Width),
1658 Val - SDWA9EncValues::SRC_VGPR_MIN);
1659 }
1660 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1661 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1662 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1663 return createSRegOperand(getSgprClassId(Width),
1664 Val - SDWA9EncValues::SRC_SGPR_MIN);
1665 }
1666 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1667 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1668 return createSRegOperand(getTtmpClassId(Width),
1669 Val - SDWA9EncValues::SRC_TTMP_MIN);
1670 }
1671
1672 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1673
1674 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1675 return decodeIntImmed(SVal);
1676
1677 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1678 return decodeFPImmed(ImmWidth, SVal, Sema);
1679
1680 return decodeSpecialReg32(SVal);
1681 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1682 return createRegOperand(getVgprClassId(Width), Val);
1683 }
1684 llvm_unreachable("unsupported target");
1685}
1686
1689}
1690
1693}
1694
1696 using namespace AMDGPU::SDWA;
1697
1698 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1699 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1700 "SDWAVopcDst should be present only on GFX9+");
1701
1702 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1703
1704 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1705 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1706
1707 int TTmpIdx = getTTmpIdx(Val);
1708 if (TTmpIdx >= 0) {
1709 auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1710 return createSRegOperand(TTmpClsId, TTmpIdx);
1711 } else if (Val > SGPR_MAX) {
1712 return IsWave64 ? decodeSpecialReg64(Val)
1713 : decodeSpecialReg32(Val);
1714 } else {
1715 return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1716 }
1717 } else {
1718 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1719 }
1720}
1721
1723 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1724 ? decodeSrcOp(OPW64, Val)
1725 : decodeSrcOp(OPW32, Val);
1726}
1727
1729 return decodeSrcOp(OPW32, Val);
1730}
1731
1734 return MCOperand();
1735 return MCOperand::createImm(Val);
1736}
1737
1739 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1740}
1741
1743
1745 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1746}
1747
1749
1751
1753 return AMDGPU::isGFX10Plus(STI);
1754}
1755
1757 return STI.hasFeature(AMDGPU::FeatureGFX11);
1758}
1759
1761 return AMDGPU::isGFX11Plus(STI);
1762}
1763
1765 return STI.hasFeature(AMDGPU::FeatureGFX12);
1766}
1767
1769 return AMDGPU::isGFX12Plus(STI);
1770}
1771
1773 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1774}
1775
1778}
1779
1780//===----------------------------------------------------------------------===//
1781// AMDGPU specific symbol handling
1782//===----------------------------------------------------------------------===//
1783
1784/// Print a string describing the reserved bit range specified by Mask with
1785/// offset BaseBytes for use in error comments. Mask is a single continuous
1786/// range of 1s surrounded by zeros. The format here is meant to align with the
1787/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1788static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1789 SmallString<32> Result;
1790 raw_svector_ostream S(Result);
1791
1792 int TrailingZeros = llvm::countr_zero(Mask);
1793 int PopCount = llvm::popcount(Mask);
1794
1795 if (PopCount == 1) {
1796 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1797 } else {
1798 S << "bits in range ("
1799 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1800 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1801 }
1802
1803 return Result;
1804}
1805
1806#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1807#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1808 do { \
1809 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1810 } while (0)
1811#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1812 do { \
1813 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1814 << GET_FIELD(MASK) << '\n'; \
1815 } while (0)
1816
1817#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1818 do { \
1819 if (FourByteBuffer & (MASK)) { \
1820 return createStringError(std::errc::invalid_argument, \
1821 "kernel descriptor " DESC \
1822 " reserved %s set" MSG, \
1823 getBitRangeFromMask((MASK), 0).c_str()); \
1824 } \
1825 } while (0)
1826
1827#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1828#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1829 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1830#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1831 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1832#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1833 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1834
1835// NOLINTNEXTLINE(readability-identifier-naming)
1837 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1838 using namespace amdhsa;
1839 StringRef Indent = "\t";
1840
1841 // We cannot accurately backward compute #VGPRs used from
1842 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1843 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1844 // simply calculate the inverse of what the assembler does.
1845
1846 uint32_t GranulatedWorkitemVGPRCount =
1847 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1848
1849 uint32_t NextFreeVGPR =
1850 (GranulatedWorkitemVGPRCount + 1) *
1851 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1852
1853 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1854
1855 // We cannot backward compute values used to calculate
1856 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1857 // directives can't be computed:
1858 // .amdhsa_reserve_vcc
1859 // .amdhsa_reserve_flat_scratch
1860 // .amdhsa_reserve_xnack_mask
1861 // They take their respective default values if not specified in the assembly.
1862 //
1863 // GRANULATED_WAVEFRONT_SGPR_COUNT
1864 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1865 //
1866 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1867 // are set to 0. So while disassembling we consider that:
1868 //
1869 // GRANULATED_WAVEFRONT_SGPR_COUNT
1870 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1871 //
1872 // The disassembler cannot recover the original values of those 3 directives.
1873
1874 uint32_t GranulatedWavefrontSGPRCount =
1875 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1876
1877 if (isGFX10Plus())
1878 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1879 "must be zero on gfx10+");
1880
1881 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1883
1884 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1886 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1887 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1888 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1889
1890 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1891
1892 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1893 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1894 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1895 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1896 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1897 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1898 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1899 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1900
1901 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1902
1903 if (!isGFX12Plus())
1904 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1905 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1906
1907 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1908
1909 if (!isGFX12Plus())
1910 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1911 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1912
1913 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1914 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1915
1916 if (isGFX9Plus())
1917 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1918
1919 if (!isGFX9Plus())
1920 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1921 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1922
1923 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1924
1925 if (!isGFX10Plus())
1926 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1927 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1928
1929 if (isGFX10Plus()) {
1930 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1931 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1932 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1933 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1934 }
1935
1936 if (isGFX12Plus())
1937 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1938 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1939
1940 return true;
1941}
1942
1943// NOLINTNEXTLINE(readability-identifier-naming)
1945 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1946 using namespace amdhsa;
1947 StringRef Indent = "\t";
1949 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1950 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1951 else
1952 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1953 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1954 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1955 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1956 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1957 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1958 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1959 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1960 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1961 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1962 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1963 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1964
1965 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
1966 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
1967 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
1968
1970 ".amdhsa_exception_fp_ieee_invalid_op",
1971 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1972 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1973 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1975 ".amdhsa_exception_fp_ieee_div_zero",
1976 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1977 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1978 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1979 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1980 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1981 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1982 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1983 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1984 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1985
1986 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
1987
1988 return true;
1989}
1990
1991// NOLINTNEXTLINE(readability-identifier-naming)
1993 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1994 using namespace amdhsa;
1995 StringRef Indent = "\t";
1996 if (isGFX90A()) {
1997 KdStream << Indent << ".amdhsa_accum_offset "
1998 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1999 << '\n';
2000
2001 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2002
2003 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2004 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2005 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2006 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2007 } else if (isGFX10Plus()) {
2008 // Bits [0-3].
2009 if (!isGFX12Plus()) {
2010 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2011 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2012 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2013 } else {
2015 "SHARED_VGPR_COUNT",
2016 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2017 }
2018 } else {
2019 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2020 "COMPUTE_PGM_RSRC3",
2021 "must be zero on gfx12+");
2022 }
2023
2024 // Bits [4-11].
2025 if (isGFX11()) {
2026 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2027 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2028 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2029 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2030 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2031 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2032 } else if (isGFX12Plus()) {
2034 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2035 } else {
2036 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2037 "COMPUTE_PGM_RSRC3",
2038 "must be zero on gfx10");
2039 }
2040
2041 // Bits [12].
2042 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2043 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2044
2045 // Bits [13].
2046 if (isGFX12Plus()) {
2048 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2049 } else {
2050 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2051 "COMPUTE_PGM_RSRC3",
2052 "must be zero on gfx10 or gfx11");
2053 }
2054
2055 // Bits [14-30].
2056 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2057 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2058
2059 // Bits [31].
2060 if (isGFX11Plus()) {
2062 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2063 } else {
2064 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2065 "COMPUTE_PGM_RSRC3",
2066 "must be zero on gfx10");
2067 }
2068 } else if (FourByteBuffer) {
2069 return createStringError(
2070 std::errc::invalid_argument,
2071 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2072 }
2073 return true;
2074}
2075#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2076#undef PRINT_DIRECTIVE
2077#undef GET_FIELD
2078#undef CHECK_RESERVED_BITS_IMPL
2079#undef CHECK_RESERVED_BITS
2080#undef CHECK_RESERVED_BITS_MSG
2081#undef CHECK_RESERVED_BITS_DESC
2082#undef CHECK_RESERVED_BITS_DESC_MSG
2083
2084/// Create an error object to return from onSymbolStart for reserved kernel
2085/// descriptor bits being set.
2086static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2087 const char *Msg = "") {
2088 return createStringError(
2089 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2090 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2091}
2092
2093/// Create an error object to return from onSymbolStart for reserved kernel
2094/// descriptor bytes being set.
2095static Error createReservedKDBytesError(unsigned BaseInBytes,
2096 unsigned WidthInBytes) {
2097 // Create an error comment in the same format as the "Kernel Descriptor"
2098 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2099 return createStringError(
2100 std::errc::invalid_argument,
2101 "kernel descriptor reserved bits in range (%u:%u) set",
2102 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2103}
2104
2107 raw_string_ostream &KdStream) const {
2108#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2109 do { \
2110 KdStream << Indent << DIRECTIVE " " \
2111 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2112 } while (0)
2113
2114 uint16_t TwoByteBuffer = 0;
2115 uint32_t FourByteBuffer = 0;
2116
2117 StringRef ReservedBytes;
2118 StringRef Indent = "\t";
2119
2120 assert(Bytes.size() == 64);
2121 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2122
2123 switch (Cursor.tell()) {
2125 FourByteBuffer = DE.getU32(Cursor);
2126 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2127 << '\n';
2128 return true;
2129
2131 FourByteBuffer = DE.getU32(Cursor);
2132 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2133 << FourByteBuffer << '\n';
2134 return true;
2135
2137 FourByteBuffer = DE.getU32(Cursor);
2138 KdStream << Indent << ".amdhsa_kernarg_size "
2139 << FourByteBuffer << '\n';
2140 return true;
2141
2143 // 4 reserved bytes, must be 0.
2144 ReservedBytes = DE.getBytes(Cursor, 4);
2145 for (int I = 0; I < 4; ++I) {
2146 if (ReservedBytes[I] != 0)
2148 }
2149 return true;
2150
2152 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2153 // So far no directive controls this for Code Object V3, so simply skip for
2154 // disassembly.
2155 DE.skip(Cursor, 8);
2156 return true;
2157
2159 // 20 reserved bytes, must be 0.
2160 ReservedBytes = DE.getBytes(Cursor, 20);
2161 for (int I = 0; I < 20; ++I) {
2162 if (ReservedBytes[I] != 0)
2164 }
2165 return true;
2166
2168 FourByteBuffer = DE.getU32(Cursor);
2169 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2170
2172 FourByteBuffer = DE.getU32(Cursor);
2173 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2174
2176 FourByteBuffer = DE.getU32(Cursor);
2177 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2178
2180 using namespace amdhsa;
2181 TwoByteBuffer = DE.getU16(Cursor);
2182
2184 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2185 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2186 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2187 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2188 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2189 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2190 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2191 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2192 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2193 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2195 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2196 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2197 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2198 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2199
2200 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2201 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2203
2204 // Reserved for GFX9
2205 if (isGFX9() &&
2206 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2208 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2209 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2210 } else if (isGFX10Plus()) {
2211 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2212 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2213 }
2214
2215 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2216 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2217 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2218
2219 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2220 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2222 }
2223
2224 return true;
2225
2227 using namespace amdhsa;
2228 TwoByteBuffer = DE.getU16(Cursor);
2229 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2230 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2231 KERNARG_PRELOAD_SPEC_LENGTH);
2232 }
2233
2234 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2235 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2236 KERNARG_PRELOAD_SPEC_OFFSET);
2237 }
2238 return true;
2239
2241 // 4 bytes from here are reserved, must be 0.
2242 ReservedBytes = DE.getBytes(Cursor, 4);
2243 for (int I = 0; I < 4; ++I) {
2244 if (ReservedBytes[I] != 0)
2246 }
2247 return true;
2248
2249 default:
2250 llvm_unreachable("Unhandled index. Case statements cover everything.");
2251 return true;
2252 }
2253#undef PRINT_DIRECTIVE
2254}
2255
2257 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2258
2259 // CP microcode requires the kernel descriptor to be 64 aligned.
2260 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2261 return createStringError(std::errc::invalid_argument,
2262 "kernel descriptor must be 64-byte aligned");
2263
2264 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2265 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2266 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2267 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2268 // when required.
2269 if (isGFX10Plus()) {
2270 uint16_t KernelCodeProperties =
2273 EnableWavefrontSize32 =
2274 AMDHSA_BITS_GET(KernelCodeProperties,
2275 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2276 }
2277
2278 std::string Kd;
2279 raw_string_ostream KdStream(Kd);
2280 KdStream << ".amdhsa_kernel " << KdName << '\n';
2281
2283 while (C && C.tell() < Bytes.size()) {
2284 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2285
2286 cantFail(C.takeError());
2287
2288 if (!Res)
2289 return Res;
2290 }
2291 KdStream << ".end_amdhsa_kernel\n";
2292 outs() << KdStream.str();
2293 return true;
2294}
2295
2297 uint64_t &Size,
2298 ArrayRef<uint8_t> Bytes,
2299 uint64_t Address) const {
2300 // Right now only kernel descriptor needs to be handled.
2301 // We ignore all other symbols for target specific handling.
2302 // TODO:
2303 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2304 // Object V2 and V3 when symbols are marked protected.
2305
2306 // amd_kernel_code_t for Code Object V2.
2307 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2308 Size = 256;
2309 return createStringError(std::errc::invalid_argument,
2310 "code object v2 is not supported");
2311 }
2312
2313 // Code Object V3 kernel descriptors.
2314 StringRef Name = Symbol.Name;
2315 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2316 Size = 64; // Size = 64 regardless of success or failure.
2317 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2318 }
2319
2320 return false;
2321}
2322
2323//===----------------------------------------------------------------------===//
2324// AMDGPUSymbolizer
2325//===----------------------------------------------------------------------===//
2326
2327// Try to find symbol name for specified label
2329 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2330 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2331 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2332
2333 if (!IsBranch) {
2334 return false;
2335 }
2336
2337 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2338 if (!Symbols)
2339 return false;
2340
2341 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2342 return Val.Addr == static_cast<uint64_t>(Value) &&
2343 Val.Type == ELF::STT_NOTYPE;
2344 });
2345 if (Result != Symbols->end()) {
2346 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2347 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2349 return true;
2350 }
2351 // Add to list of referenced addresses, so caller can synthesize a label.
2352 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2353 return false;
2354}
2355
2357 int64_t Value,
2358 uint64_t Address) {
2359 llvm_unreachable("unimplemented");
2360}
2361
2362//===----------------------------------------------------------------------===//
2363// Initialization
2364//===----------------------------------------------------------------------===//
2365
2367 LLVMOpInfoCallback /*GetOpInfo*/,
2368 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2369 void *DisInfo,
2370 MCContext *Ctx,
2371 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2372 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2373}
2374
2376 const MCSubtargetInfo &STI,
2377 MCContext &Ctx) {
2378 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2379}
2380
2386}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, uint16_t NameIdx)
static DecoderUInt128 eat12Bytes(ArrayRef< uint8_t > &Bytes)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, bool MandatoryLiteral, unsigned ImmWidth, AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
#define DECODE_OPERAND_REG_7(RegClass, OpWidth)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, AMDGPUDisassembler::OpWidthTy Opw, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
void convertEXPInst(MCInst &MI) const
MCOperand createRegOperand(unsigned int RegId) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
unsigned getVgprClassId(const OpWidthTy Width) const
unsigned getAgprClassId(const OpWidthTy Width) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
MCOperand decodeDpp8FI(unsigned Val) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
MCOperand decodeBoolReg(unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val, unsigned ImmWidth, AMDGPU::OperandSemantics Sema) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
MCOperand decodeSplitBarrier(unsigned Val) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
void convertFMAanyK(MCInst &MI, int ImmLitIdx) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm, AMDGPU::OperandSemantics Sema)
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val, bool MandatoryLiteral=false, unsigned ImmWidth=0, AMDGPU::OperandSemantics Sema=AMDGPU::OperandSemantics::INT) const
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand decodeLiteralConstant(bool ExtendFP64) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
unsigned getSgprClassId(const OpWidthTy Width) const
static MCOperand decodeIntImmed(unsigned Imm)
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
unsigned getTtmpClassId(const OpWidthTy Width) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
This class represents an Operation in the Expression.
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
Definition: DataExtractor.h:54
uint64_t tell() const
Return the current position of this Cursor.
Definition: DataExtractor.h:71
uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Superclass for all disassemblers.
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isValid() const
Definition: MCInst.h:60
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:39
MCContext & Ctx
Definition: MCSymbolizer.h:41
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:678
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool hasVOPD(const MCSubtargetInfo &STI)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_NOTYPE
Definition: ELF.h:1322
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ STT_OBJECT
Definition: ELF.h:1323
uint16_t read16(const void *P, endianness E)
Definition: Endian.h:401
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1258
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:749
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Add
Sum of integers.
std::vector< SymbolInfoTy > SectionSymbolsTy
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
Description of the encoding of one expression Op.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.