96#define DEBUG_TYPE "x86-disassembler"
98#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
123#include "X86GenDisassemblerTables.inc"
143 unsigned DecisionIndex =
165 return modRMTable[Decision.
instructionIDs + ((ModRM & 0x38) >> 3) + 8];
166 return modRMTable[Decision.
instructionIDs + ((ModRM & 0x38) >> 3)];
167 case MODRM_SPLITMISC:
170 return modRMTable[Decision.
instructionIDs + ((ModRM & 0x38) >> 3)];
180 byte = insn->
bytes[offset];
185 auto r = insn->
bytes;
187 if (offset +
sizeof(
T) > r.size())
195 return insn->
mode ==
MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
225 if ((
byte == 0xf2 ||
byte == 0xf3) && !
peek(insn,
nextByte)) {
234 if (!(
byte == 0xf3 &&
nextByte == 0x90))
252 if (
peek(insn, nnextByte))
318 if (
isREX(insn,
byte)) {
339 if (
peek(insn, byte2)) {
380 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
384 }
else if (
byte == 0xc4) {
386 if (
peek(insn, byte1)) {
415 }
else if (
byte == 0xc5) {
417 if (
peek(insn, byte1)) {
447 }
else if (
byte == 0x8f) {
449 if (
peek(insn, byte1)) {
454 if ((byte1 & 0x38) != 0x0)
486 }
else if (
isREX2(insn,
byte)) {
488 if (
peek(insn, byte1)) {
545 sibBaseBase = SIB_BASE_EAX;
549 sibBaseBase = SIB_BASE_RAX;
674 EABase eaBaseBase = EA_BASE_BX_SI;
743 insn->
eaBase = EA_BASE_sib;
766#define GENERIC_FIXUP_FUNC(name, base, prefix) \
767 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
768 uint8_t index, uint8_t *valid) { \
772 debug("Unhandled register type"); \
776 return base + index; \
778 if (insn->rexPrefix && index >= 4 && index <= 7) \
779 return prefix##_SPL + (index - 4); \
781 return prefix##_AL + index; \
783 return prefix##_AX + index; \
785 return prefix##_EAX + index; \
787 return prefix##_RAX + index; \
789 return prefix##_ZMM0 + index; \
791 return prefix##_YMM0 + index; \
793 return prefix##_XMM0 + index; \
797 return prefix##_TMM0 + index; \
802 return prefix##_K0 + index; \
806 return prefix##_K0_K1 + (index / 2); \
808 return prefix##_MM0 + (index & 0x7); \
809 case TYPE_SEGMENTREG: \
810 if ((index & 7) > 5) \
812 return prefix##_ES + (index & 7); \
813 case TYPE_DEBUGREG: \
816 return prefix##_DR0 + index; \
817 case TYPE_CONTROLREG: \
820 return prefix##_CR0 + index; \
822 return prefix##_XMM0 + index; \
824 return prefix##_YMM0 + index; \
826 return prefix##_ZMM0 + index; \
857 debug(
"Expected a REG or R/M encoding in fixupReg");
867 insn->reg - insn->regBase, &valid);
896 if (insn->eaBase >= insn->eaRegBase) {
897 insn->eaBase = (
EABase)fixupRMValue(
898 insn, (
OperandType)
op->type, insn->eaBase - insn->eaRegBase, &valid);
919 dbgs() <<
format(
"Unhandled mmm field for instruction (0x%hhx)",
948 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
977 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
999 if (current == 0x0f) {
1001 dbgs() <<
format(
"Found a two-byte escape prefix (0x%hhx)", current));
1005 if (current == 0x38) {
1012 }
else if (current == 0x3a) {
1019 }
else if (current == 0x0f) {
1021 dbgs() <<
format(
"Found a 3dnow escape prefix (0x%hhx)", current));
1049 for (
int i = 0;; i++) {
1050 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
1052 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
1054 if (orig[i] != equiv[i]) {
1055 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
1057 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
1059 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
1068 for (
int i = 0;; ++i) {
1069 if (
name[i] ==
'\0')
1071 if (
name[i] ==
'6' &&
name[i + 1] ==
'4')
1101 switch (insn->
opcode & 0xfe) {
1279 (insn->
opcode == 0xA1 || (insn->
opcode & 0xf0) == 0x50))
1324 auto SpecName = mii->
getName(instructionIDWithREXW);
1326 if (!
is64Bit(SpecName.data())) {
1388 specName = mii->
getName(instructionID);
1389 specWithOpSizeName = mii->
getName(instructionIDWithOpsize);
1407 uint16_t instructionIDWithNewOpcode;
1430 insn->
spec = specWithNewOpcode;
1455 auto setOpcodeRegister = [&](
unsigned base) {
1464 setOpcodeRegister(MODRM_REG_AL);
1473 setOpcodeRegister(MODRM_REG_AX);
1476 setOpcodeRegister(MODRM_REG_EAX);
1479 setOpcodeRegister(MODRM_REG_RAX);
1556 insn->
vvvv =
static_cast<Reg>(vvvv);
1578 int hasVVVV, needVVVV;
1585 needVVVV = hasVVVV && (insn->
vvvv != 0);
1588 switch (
Op.encoding) {
1596 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1601 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1616 debug(
"Unhandled VSIB index type");
1638 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1666 if (
Op.type == TYPE_XMM ||
Op.type == TYPE_YMM)
1734 case ENCODING_WRITEMASK:
1741 LLVM_DEBUG(
dbgs() <<
"Encountered an operand with an unknown encoding.");
1781 std::unique_ptr<const MCInstrInfo> MII;
1783 X86GenericDisassembler(
const MCSubtargetInfo &STI, MCContext &Ctx,
1784 std::unique_ptr<const MCInstrInfo> MII);
1787 ArrayRef<uint8_t> Bytes, uint64_t
Address,
1788 raw_ostream &cStream)
const override;
1796X86GenericDisassembler::X86GenericDisassembler(
1799 std::unique_ptr<const MCInstrInfo> MII)
1802 if (FB[X86::Is16Bit]) {
1805 }
else if (FB[X86::Is32Bit]) {
1808 }
else if (FB[X86::Is64Bit]) {
1819 CommentStream = &CStream;
1821 InternalInstruction Insn;
1822 memset(&Insn, 0,
sizeof(InternalInstruction));
1858 Instr.setFlags(Flags);
1873#define ENTRY(x) X86::x,
1877 MCPhysReg llvmRegnum = llvmRegnums[reg];
1899 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::RSI;
1901 baseRegNo = insn.
hasAdSize ? X86::SI : X86::ESI;
1904 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::SI;
1924 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::RDI;
1926 baseRegNo = insn.
hasAdSize ? X86::DI : X86::EDI;
1929 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::DI;
1952 if (type == TYPE_REL) {
1963 if(immediate & 0x80)
1964 immediate |= ~(0xffull);
1967 if(immediate & 0x8000)
1968 immediate |= ~(0xffffull);
1971 if(immediate & 0x80000000)
1972 immediate |= ~(0xffffffffull);
1979 if(immediate & 0x80)
1980 immediate |= ~(0xffull);
1983 if(immediate & 0x8000)
1984 immediate |= ~(0xffffull);
1987 if(immediate & 0x80000000)
1988 immediate |= ~(0xffffffffull);
1993 else if (type == TYPE_IMM) {
1998 if(immediate & 0x80)
1999 immediate |= ~(0xffull);
2002 if(immediate & 0x8000)
2003 immediate |= ~(0xffffull);
2006 if(immediate & 0x80000000)
2007 immediate |= ~(0xffffffffull);
2034 if (type == TYPE_MOFFS) {
2049 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2050 debug(
"A R/M register operand may not have a SIB byte");
2056 debug(
"Unexpected EA base register");
2059 debug(
"EA_BASE_NONE for ModR/M base");
2061#define ENTRY(x) case EA_BASE_##x:
2064 debug(
"A R/M register operand may not have a base; "
2065 "the operand must be a register.");
2069 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2088 bool ForceSIB =
false) {
2108 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2112 debug(
"Unexpected sibBase");
2115 case SIB_BASE_##x: \
2116 baseReg = MCOperand::createReg(X86::x); break;
2127 debug(
"Unexpected sibIndex");
2130 case SIB_INDEX_##x: \
2131 indexReg = MCOperand::createReg(X86::x); break;
2152 insn.
sibBase != SIB_BASE_R12D && insn.
sibBase != SIB_BASE_R12))) {
2164 debug(
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2201 debug(
"Unexpected eaBase");
2209 baseReg = MCOperand::createReg(X86::x); break;
2212#define ENTRY(x) case EA_REG_##x:
2215 debug(
"A R/M memory operand may not be a register; "
2216 "the base field must be a base.");
2253 switch (operand.
type) {
2255 debug(
"Unexpected type for a R/M operand");
2270 case TYPE_CONTROLREG:
2301 if (maskRegNum >= 8) {
2302 debug(
"Invalid mask register number");
2322 debug(
"Unhandled operand encoding during translation");
2327 case ENCODING_WRITEMASK:
2390 debug(
"Instruction has no specification");
2400 if(mcInst.
getOpcode() == X86::REP_PREFIX)
2402 else if(mcInst.
getOpcode() == X86::REPNE_PREFIX)
2409 if (
Op.encoding != ENCODING_NONE) {
2422 std::unique_ptr<const MCInstrInfo> MII(
T.createMCInstrInfo());
2423 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
static bool isBranch(unsigned Opcode)
#define LLVM_C_ABI
LLVM_C_ABI is the export/visibility macro used to mark symbols declared in llvm-c as exported when bu...
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define SPARSE_OPCODE_DECISION_INDICES_SYM
#define SPARSE_OPCODE_DECISIONS_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scFromEVEX4of4(evex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define b2FromEVEX2of4(evex)
#define vvvvFromVEX2of2(vex)
#define nfFromEVEX4of4(evex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define uFromEVEX3of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define vvvvFromXOP3of3(xop)
#define wFromXOP3of3(xop)
#define oszcFromEVEX3of4(evex)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool readOpcode(struct InternalInstruction *insn)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
static int readOperands(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static bool is64Bit(const char *name)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
static int readSIB(struct InternalInstruction *insn)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
static int readVVVV(struct InternalInstruction *insn)
static bool isNF(InternalInstruction *insn)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
static const ModRMDecision & getDecision(OpcodeType Type, InstructionContext Context, uint8_t Opcode)
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int readMaskRegister(struct InternalInstruction *insn)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static int readDisplacement(struct InternalInstruction *insn)
static bool isCCMPOrCTEST(InternalInstruction *insn)
static LLVM_ATTRIBUTE_NOINLINE InstrUID decodeModRM(const ModRMDecision &Decision, uint8_t ModRM)
LLVM_C_ABI void LLVMInitializeX86Disassembler()
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
static int readModRM(struct InternalInstruction *insn)
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
static int readPrefixes(struct InternalInstruction *insn)
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
Represent a constant reference to a string, i.e.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
Define some predicates that are used for node matching.
NodeAddr< InstrNode * > Instr
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)
is always non-negative.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
Implement std::hash so that hash_code can be used in STL containers.
OpcodeDecision opcodeDecisions[IC_max]
ModRMDecision modRMDecisions[256]
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t rex2ExtensionPrefix[2]
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.