94#define DEBUG_TYPE "x86-disassembler"
96#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
121#include "X86GenDisassemblerTables.inc"
124 uint8_t opcode, uint8_t modRM) {
129 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
132 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
135 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
138 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
141 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
144 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
147 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
154 dec = &
MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
157 dec = &
MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
160 dec = &
MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
163 dec = &
MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
179 return modRMTable[dec->
instructionIDs + ((modRM & 0x38) >> 3) + 8];
181 case MODRM_SPLITMISC:
194 byte = insn->
bytes[offset];
199 auto r = insn->
bytes;
201 if (offset +
sizeof(
T) > r.size())
209 return insn->
mode ==
MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
239 if ((
byte == 0xf2 ||
byte == 0xf3) && !
peek(insn,
nextByte)) {
248 if (!(
byte == 0xf3 &&
nextByte == 0x90))
266 if (
peek(insn, nnextByte))
336 uint8_t byte1, byte2;
342 if (
peek(insn, byte2)) {
383 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
387 }
else if (
byte == 0xc4) {
389 if (
peek(insn, byte1)) {
418 }
else if (
byte == 0xc5) {
420 if (
peek(insn, byte1)) {
450 }
else if (
byte == 0x8f) {
452 if (
peek(insn, byte1)) {
457 if ((byte1 & 0x38) != 0x0)
489 }
else if (
isREX2(insn,
byte)) {
491 if (
peek(insn, byte1)) {
506 }
else if (
isREX(insn,
byte)) {
553 sibBaseBase = SIB_BASE_EAX;
557 sibBaseBase = SIB_BASE_RAX;
638 uint8_t mod, rm, reg;
682 EABase eaBaseBase = EA_BASE_BX_SI;
751 insn->
eaBase = EA_BASE_sib;
774#define GENERIC_FIXUP_FUNC(name, base, prefix) \
775 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
776 uint8_t index, uint8_t *valid) { \
780 debug("Unhandled register type"); \
784 return base + index; \
786 if (insn->rexPrefix && index >= 4 && index <= 7) \
787 return prefix##_SPL + (index - 4); \
789 return prefix##_AL + index; \
791 return prefix##_AX + index; \
793 return prefix##_EAX + index; \
795 return prefix##_RAX + index; \
797 return prefix##_ZMM0 + index; \
799 return prefix##_YMM0 + index; \
801 return prefix##_XMM0 + index; \
805 return prefix##_TMM0 + index; \
810 return prefix##_K0 + index; \
814 return prefix##_K0_K1 + (index / 2); \
816 return prefix##_MM0 + (index & 0x7); \
817 case TYPE_SEGMENTREG: \
818 if ((index & 7) > 5) \
820 return prefix##_ES + (index & 7); \
821 case TYPE_DEBUGREG: \
824 return prefix##_DR0 + index; \
825 case TYPE_CONTROLREG: \
828 return prefix##_CR0 + index; \
830 return prefix##_XMM0 + index; \
832 return prefix##_YMM0 + index; \
834 return prefix##_ZMM0 + index; \
865 debug(
"Expected a REG or R/M encoding in fixupReg");
875 insn->reg - insn->regBase, &valid);
904 if (insn->eaBase >= insn->eaRegBase) {
905 insn->eaBase = (
EABase)fixupRMValue(
906 insn, (
OperandType)
op->type, insn->eaBase - insn->eaRegBase, &valid);
927 dbgs() <<
format(
"Unhandled mmm field for instruction (0x%hhx)",
956 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
985 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
1007 if (current == 0x0f) {
1009 dbgs() <<
format(
"Found a two-byte escape prefix (0x%hhx)", current));
1013 if (current == 0x38) {
1020 }
else if (current == 0x3a) {
1027 }
else if (current == 0x0f) {
1029 dbgs() <<
format(
"Found a 3dnow escape prefix (0x%hhx)", current));
1057 for (
int i = 0;; i++) {
1058 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
1060 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
1062 if (orig[i] != equiv[i]) {
1063 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
1065 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
1067 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
1076 for (
int i = 0;; ++i) {
1077 if (
name[i] ==
'\0')
1079 if (
name[i] ==
'6' &&
name[i + 1] ==
'4')
1149 switch (insn->
opcode & 0xfe) {
1318 attrMask &= ~ATTR_ADSIZE;
1323 (insn->
opcode == 0xA1 || (insn->
opcode & 0xf0) == 0x50))
1368 auto SpecName = mii->
getName(instructionIDWithREXW);
1370 if (!
is64Bit(SpecName.data())) {
1432 specName = mii->
getName(instructionID);
1433 specWithOpSizeName = mii->
getName(instructionIDWithOpsize);
1451 uint16_t instructionIDWithNewOpcode;
1474 insn->
spec = specWithNewOpcode;
1499 auto setOpcodeRegister = [&](
unsigned base) {
1508 setOpcodeRegister(MODRM_REG_AL);
1517 setOpcodeRegister(MODRM_REG_AX);
1520 setOpcodeRegister(MODRM_REG_EAX);
1523 setOpcodeRegister(MODRM_REG_RAX);
1600 insn->
vvvv =
static_cast<Reg>(vvvv);
1622 int hasVVVV, needVVVV;
1629 needVVVV = hasVVVV && (insn->
vvvv != 0);
1632 switch (
Op.encoding) {
1640 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1645 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1660 debug(
"Unhandled VSIB index type");
1682 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1710 if (
Op.type == TYPE_XMM ||
Op.type == TYPE_YMM)
1778 case ENCODING_WRITEMASK:
1785 LLVM_DEBUG(
dbgs() <<
"Encountered an operand with an unknown encoding.");
1825 std::unique_ptr<const MCInstrInfo> MII;
1828 std::unique_ptr<const MCInstrInfo> MII);
1840X86GenericDisassembler::X86GenericDisassembler(
1843 std::unique_ptr<const MCInstrInfo> MII)
1846 if (FB[X86::Is16Bit]) {
1849 }
else if (FB[X86::Is32Bit]) {
1852 }
else if (FB[X86::Is64Bit]) {
1863 CommentStream = &CStream;
1879 Insn.operands = x86OperandSets[
Insn.spec->operands];
1890 if (!
Insn.mandatoryPrefix) {
1893 if (
Insn.repeatPrefix == 0xf2)
1895 else if (
Insn.repeatPrefix == 0xf3 &&
1897 Insn.opcode != 0x90)
1899 if (
Insn.hasLockPrefix)
1902 Instr.setFlags(Flags);
1917#define ENTRY(x) X86::x,
1921 MCPhysReg llvmRegnum = llvmRegnums[reg];
1943 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::RSI;
1945 baseRegNo = insn.
hasAdSize ? X86::SI : X86::ESI;
1948 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::SI;
1968 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::RDI;
1970 baseRegNo = insn.
hasAdSize ? X86::DI : X86::EDI;
1973 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::DI;
1996 if (type == TYPE_REL) {
2007 if(immediate & 0x80)
2008 immediate |= ~(0xffull);
2011 if(immediate & 0x8000)
2012 immediate |= ~(0xffffull);
2015 if(immediate & 0x80000000)
2016 immediate |= ~(0xffffffffull);
2023 if(immediate & 0x80)
2024 immediate |= ~(0xffull);
2027 if(immediate & 0x8000)
2028 immediate |= ~(0xffffull);
2031 if(immediate & 0x80000000)
2032 immediate |= ~(0xffffffffull);
2037 else if (type == TYPE_IMM) {
2042 if(immediate & 0x80)
2043 immediate |= ~(0xffull);
2046 if(immediate & 0x8000)
2047 immediate |= ~(0xffffull);
2050 if(immediate & 0x80000000)
2051 immediate |= ~(0xffffffffull);
2078 if (type == TYPE_MOFFS) {
2093 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2094 debug(
"A R/M register operand may not have a SIB byte");
2100 debug(
"Unexpected EA base register");
2103 debug(
"EA_BASE_NONE for ModR/M base");
2105#define ENTRY(x) case EA_BASE_##x:
2108 debug(
"A R/M register operand may not have a base; "
2109 "the operand must be a register.");
2113 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2132 bool ForceSIB =
false) {
2152 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2156 debug(
"Unexpected sibBase");
2159 case SIB_BASE_##x: \
2160 baseReg = MCOperand::createReg(X86::x); break;
2171 debug(
"Unexpected sibIndex");
2174 case SIB_INDEX_##x: \
2175 indexReg = MCOperand::createReg(X86::x); break;
2196 insn.
sibBase != SIB_BASE_R12D && insn.
sibBase != SIB_BASE_R12))) {
2208 debug(
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2245 debug(
"Unexpected eaBase");
2253 baseReg = MCOperand::createReg(X86::x); break;
2256#define ENTRY(x) case EA_REG_##x:
2259 debug(
"A R/M memory operand may not be a register; "
2260 "the base field must be a base.");
2276 const uint8_t dispSize =
2297 switch (operand.
type) {
2299 debug(
"Unexpected type for a R/M operand");
2314 case TYPE_CONTROLREG:
2344 uint8_t maskRegNum) {
2345 if (maskRegNum >= 8) {
2346 debug(
"Invalid mask register number");
2366 debug(
"Unhandled operand encoding during translation");
2371 case ENCODING_WRITEMASK:
2434 debug(
"Instruction has no specification");
2444 if(mcInst.
getOpcode() == X86::REP_PREFIX)
2446 else if(mcInst.
getOpcode() == X86::REPNE_PREFIX)
2453 if (
Op.encoding != ENCODING_NONE) {
2466 std::unique_ptr<const MCInstrInfo> MII(
T.createMCInstrInfo());
2467 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define LLVM_EXTERNAL_VISIBILITY
static bool isBranch(unsigned Opcode)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define THREEDNOW_MAP_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scFromEVEX4of4(evex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define b2FromEVEX2of4(evex)
#define vvvvFromVEX2of2(vex)
#define nfFromEVEX4of4(evex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define x2FromEVEX3of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define vvvvFromXOP3of3(xop)
#define wFromXOP3of3(xop)
#define oszcFromEVEX3of4(evex)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool readOpcode(struct InternalInstruction *insn)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
static int readOperands(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static bool is64Bit(const char *name)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
static int readSIB(struct InternalInstruction *insn)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
static int readVVVV(struct InternalInstruction *insn)
static bool isNF(InternalInstruction *insn)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int readMaskRegister(struct InternalInstruction *insn)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static int readDisplacement(struct InternalInstruction *insn)
static bool isCCMPOrCTEST(InternalInstruction *insn)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
static int readModRM(struct InternalInstruction *insn)
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
static int readPrefixes(struct InternalInstruction *insn)
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
This class represents an Operation in the Expression.
Container class for subtarget features.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ X86
Windows x64, Windows Itanium (IA-64)
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
Implement std::hash so that hash_code can be used in STL containers.
OpcodeDecision opcodeDecisions[IC_max]
ModRMDecision modRMDecisions[256]
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t rex2ExtensionPrefix[2]
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.