23 using namespace llvm::X86Disassembler;
48 #include "X86GenDisassemblerTables.inc"
51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
53 #define debug(s) do { } while (0)
110 modrm_type != MODRM_ONEENTRY;
131 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
134 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
137 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
140 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
143 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
146 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
149 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155 debug(
"Corrupt table! Unknown modrm_type");
167 case MODRM_SPLITMISC:
222 #define CONSUME_FUNC(name, type) \
223 static int name(struct InternalInstruction* insn, type* ptr) { \
226 for (offset = 0; offset < sizeof(type); ++offset) { \
228 int ret = insn->reader(insn->readerArg, \
230 insn->readerCursor + offset); \
233 combined = combined | ((uint64_t)byte << (offset * 8)); \
236 insn->readerCursor += sizeof(type); \
274 va_start(ap, format);
275 (void)vsnprintf(buffer,
sizeof(buffer),
format, ap);
278 insn->dlog(insn->dlogArg, buffer);
328 bool prefixGroups[4] = {
false };
329 uint64_t prefixLocation;
333 bool hasAdSize =
false;
334 bool hasOpSize =
false;
353 && (byte == 0xf2 || byte == 0xf3)
363 if ((byte == 0xf2 || byte == 0xf3) &&
364 ((nextByte == 0xf0) |
365 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
374 (nextByte == 0x88 || nextByte == 0x89 ||
375 nextByte == 0xc6 || nextByte == 0xc7))
384 if (nextByte != 0x0f && nextByte != 0x90)
393 dbgprintf(insn,
"Redundant Group 1 prefix");
394 prefixGroups[0] =
true;
423 debug(
"Unhandled override");
427 dbgprintf(insn,
"Redundant Group 2 prefix");
428 prefixGroups[1] =
true;
433 dbgprintf(insn,
"Redundant Group 3 prefix");
434 prefixGroups[2] =
true;
440 dbgprintf(insn,
"Redundant Group 4 prefix");
441 prefixGroups[3] =
true;
451 dbgprintf(insn,
"Found prefix 0x%hhx", byte);
457 uint8_t byte1, byte2;
460 dbgprintf(insn,
"Couldn't read second byte of EVEX prefix");
465 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
470 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
482 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
486 dbgprintf(insn,
"Couldn't read fourth byte of EVEX prefix");
499 dbgprintf(insn,
"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
503 }
else if (byte == 0xc4) {
507 dbgprintf(insn,
"Couldn't read second byte of VEX");
534 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
538 }
else if (byte == 0xc5) {
542 dbgprintf(insn,
"Couldn't read second byte of VEX");
569 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx",
573 }
else if (byte == 0x8f) {
577 dbgprintf(insn,
"Couldn't read second byte of XOP");
581 if ((byte1 & 0x38) != 0x0) {
612 dbgprintf(insn,
"Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
618 if ((byte & 0xf0) == 0x40) {
621 if (
lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
629 dbgprintf(insn,
"Found REX prefix 0x%hhx", byte);
691 dbgprintf(insn,
"Unhandled mm field for instruction (0x%hhx)",
707 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
726 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
744 if (current == 0x0f) {
745 dbgprintf(insn,
"Found a two-byte escape prefix (0x%hhx)", current);
750 if (current == 0x38) {
751 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
757 }
else if (current == 0x3a) {
758 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
765 dbgprintf(insn,
"Didn't find a three-byte escape prefix");
798 bool hasModRMExtension;
806 if (hasModRMExtension) {
835 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
837 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
839 if (orig[i] != equiv[i]) {
840 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
842 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
844 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
862 if (name[i] ==
'6' && name[i+1] ==
'4')
878 uint16_t instructionID;
882 attrMask = ATTR_NONE;
885 attrMask |= ATTR_64BIT;
893 attrMask |= ATTR_OPSIZE;
904 attrMask |= ATTR_EVEXKZ;
906 attrMask |= ATTR_EVEXB;
908 attrMask |= ATTR_EVEXK;
910 attrMask |= ATTR_EVEXL;
912 attrMask |= ATTR_EVEXL2;
916 attrMask |= ATTR_OPSIZE;
927 attrMask |= ATTR_VEXL;
931 attrMask |= ATTR_OPSIZE;
942 attrMask |= ATTR_VEXL;
946 attrMask |= ATTR_OPSIZE;
957 attrMask |= ATTR_VEXL;
963 attrMask |= ATTR_OPSIZE;
965 attrMask |= ATTR_ADSIZE;
973 attrMask |= ATTR_REXW;
981 attrMask ^= ATTR_ADSIZE;
1001 uint16_t instructionIDWithREXW;
1003 insn, attrMask | ATTR_REXW)) {
1009 const char *SpecName =
GetInstrName(instructionIDWithREXW, miiArg);
1029 attrMask |= ATTR_ADSIZE;
1031 attrMask |= ATTR_OPSIZE;
1035 attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1046 !(attrMask & ATTR_OPSIZE)) {
1056 uint16_t instructionIDWithOpsize;
1057 const char *specName, *specWithOpSizeName;
1063 attrMask | ATTR_OPSIZE)) {
1075 specWithOpSizeName =
GetInstrName(instructionIDWithOpsize, miiArg);
1096 uint16_t instructionIDWithNewOpcode;
1120 insn->
spec = specWithNewOpcode;
1141 uint8_t index, base;
1152 dbgprintf(insn,
"SIB-based addressing doesn't work in 16-bit mode");
1155 sibIndexBase = SIB_INDEX_EAX;
1156 sibBaseBase = SIB_BASE_EAX;
1159 sibIndexBase = SIB_INDEX_RAX;
1160 sibBaseBase = SIB_BASE_RAX;
1212 debug(
"Cannot have Mod = 0b11 and a SIB byte");
1249 if (consumeInt8(insn, &d8))
1254 if (consumeInt16(insn, &d16))
1259 if (consumeInt32(insn, &d32))
1277 uint8_t mod, rm, reg;
1303 insn->
regBase = MODRM_REG_EAX;
1307 insn->
regBase = MODRM_REG_RAX;
1370 EA_BASE_sib : EA_BASE_sib64);
1392 insn->
eaBase = EA_BASE_sib;
1414 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1415 static uint8_t name(struct InternalInstruction *insn, \
1422 debug("Unhandled register type"); \
1426 return base + index; \
1428 if (insn->rexPrefix && \
1429 index >= 4 && index <= 7) { \
1430 return prefix##_SPL + (index - 4); \
1432 return prefix##_AL + index; \
1435 return prefix##_AX + index; \
1437 return prefix##_EAX + index; \
1439 return prefix##_RAX + index; \
1441 return prefix##_ZMM0 + index; \
1443 return prefix##_YMM0 + index; \
1448 return prefix##_XMM0 + index; \
1454 return prefix##_K0 + index; \
1456 return prefix##_MM0 + (index & 0x7); \
1457 case TYPE_SEGMENTREG: \
1460 return prefix##_ES + index; \
1461 case TYPE_DEBUGREG: \
1462 return prefix##_DR0 + index; \
1463 case TYPE_CONTROLREG: \
1464 return prefix##_CR0 + index; \
1501 debug(
"Expected a REG or R/M encoding in fixupReg");
1504 insn->vvvv = (
Reg)fixupRegValue(insn,
1512 insn->reg = (
Reg)fixupRegValue(insn,
1514 insn->reg - insn->regBase,
1520 if (insn->eaBase >= insn->eaRegBase) {
1521 insn->eaBase = (
EABase)fixupRMValue(insn,
1523 insn->eaBase - insn->eaRegBase,
1546 dbgprintf(insn,
"readOpcodeRegister()");
1601 debug(
"Already consumed two immediates");
1618 if (consumeUInt16(insn, &imm16))
1623 if (consumeUInt32(insn, &imm32))
1628 if (consumeUInt64(insn, &imm64))
1665 insn->
vvvv =
static_cast<Reg>(vvvv);
1695 int hasVVVV, needVVVV;
1703 needVVVV = hasVVVV && (insn->
vvvv != 0);
1705 for (
const auto &Op : x86OperandSets[insn->
spec->
operands]) {
1706 switch (Op.encoding) {
1719 insn->
displacement *= 1 << (Op.encoding - ENCODING_RM);
1727 dbgprintf(insn,
"We currently don't hande code-offset encodings");
1740 if (Op.type == TYPE_XMM128 ||
1741 Op.type == TYPE_XMM256)
1793 case ENCODING_WRITEMASK:
1800 dbgprintf(insn,
"Encountered an operand with an unknown encoding.");
1806 if (needVVVV)
return -1;
1833 const void *readerArg,
dlog_t logger,
void *loggerArg,
const void *miiArg,
1848 getID(insn, miiArg) ||
1857 dbgprintf(insn,
"Read from 0x%llx to 0x%llx: length %zu",
1861 dbgprintf(insn,
"Instruction exceeds 15-byte limit");
#define bFromEVEX4of4(evex)
bool consumedDisplacement
VectorExtensionType vectorExtensionType
#define wFromEVEX3of4(evex)
The specification for how to extract and interpret a full instruction and its operands.
#define bFromVEX2of3(vex)
static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
#define rmFromModRM(modRM)
static int readSIB(struct InternalInstruction *insn)
#define zFromEVEX4of4(evex)
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
#define vvvvFromVEX2of2(vex)
#define vvvvFromEVEX3of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
#define r2FromEVEX2of4(evex)
#define aaaFromEVEX4of4(evex)
#define bFromEVEX2of4(evex)
SIBIndex
All possible values of the SIB index field.
static int readOpcode(struct InternalInstruction *insn)
#define rFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
#define rFromVEX2of2(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define lFromVEX2of2(vex)
static int readVVVV(struct InternalInstruction *insn)
static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
#define xFromXOP2of3(xop)
#define v2FromEVEX4of4(evex)
SegmentOverride segmentOverride
static int getID(struct InternalInstruction *insn, const void *miiArg)
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
ModRMDecision modRMDecisions[256]
uint8_t prefixPresent[0x100]
static void unconsumeByte(struct InternalInstruction *insn)
#define ppFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define vvvvFromVEX3of3(vex)
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
format_object< Ts...> format(const char *Fmt, const Ts &...Vals)
These are helper functions used to produce formatted output.
The specification for how to extract and interpret one operand.
uint64_t necessaryPrefixLocation
static bool is64Bit(const char *name)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
const char * GetInstrName(unsigned Opcode, const void *mii)
#define bFromXOP2of3(xop)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
#define lFromVEX3of3(vex)
#define ppFromVEX2of2(vex)
static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode)
The x86 internal instruction, which is produced by the decoder.
static int readPrefixes(struct InternalInstruction *insn)
static void dbgprintf(struct InternalInstruction *insn, const char *format,...)
#define lFromXOP3of3(xop)
EADisplacement eaDisplacement
#define mmmmmFromVEX2of3(vex)
#define ppFromEVEX3of4(evex)
#define scaleFromSIB(sib)
static int readOperands(struct InternalInstruction *insn)
ArrayRef< OperandSpecifier > operands
const InstructionSpecifier * spec
#define xFromVEX2of3(vex)
static bool is16BitEquivalent(const char *orig, const char *equiv)
#define rFromXOP2of3(xop)
#define CONSUME_FUNC(name, type)
static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
uint64_t prefixLocations[0x100]
#define lFromEVEX4of4(evex)
static InstructionContext contextForAttrs(uint16_t attrMask)
#define l2FromEVEX4of4(evex)
#define wFromVEX3of3(vex)
#define wFromXOP3of3(xop)
uint8_t displacementOffset
#define indexFromSIB(sib)
SIBBase
All possible values of the SIB base field.
OperandType
Types of operands to CF instructions.
uint8_t numImmediatesConsumed
static int readModRM(struct InternalInstruction *insn)
#define mmFromEVEX2of4(evex)
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define rFromVEX2of3(vex)
Specifies which opcode->instruction tables to look at given a particular context (set of attributes)...
#define regFromModRM(modRM)
static const struct InstructionSpecifier * specifierForUID(InstrUID uid)
#define modFromModRM(modRM)
OpcodeDecision opcodeDecisions[IC_max]
static int readMaskRegister(struct InternalInstruction *insn)
uint8_t vectorExtensionPrefix[4]
#define vvvvFromXOP3of3(vex)
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int getIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
LocationClass< Ty > location(Ty &L)
#define ppFromXOP3of3(xop)
Specifies which set of ModR/M->instruction tables to look at given a particular opcode.
DisassemblerMode
Decoding mode for the Intel disassembler.
Specifies whether a ModR/M byte is needed and (if so) which instruction each possible value of the Mo...