23 using namespace llvm::X86Disassembler;
48 #include "X86GenDisassemblerTables.inc"
51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
53 #define debug(s) do { } while (0)
109 modrm_type != MODRM_ONEENTRY;
130 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
154 debug(
"Corrupt table! Unknown modrm_type");
166 case MODRM_SPLITMISC:
221 #define CONSUME_FUNC(name, type) \
222 static int name(struct InternalInstruction* insn, type* ptr) { \
225 for (offset = 0; offset < sizeof(type); ++offset) { \
227 int ret = insn->reader(insn->readerArg, \
229 insn->readerCursor + offset); \
232 combined = combined | ((uint64_t)byte << (offset * 8)); \
235 insn->readerCursor += sizeof(type); \
273 va_start(ap, format);
274 (void)vsnprintf(buffer,
sizeof(buffer),
format, ap);
277 insn->dlog(insn->dlogArg, buffer);
325 bool prefixGroups[4] = {
false };
326 uint64_t prefixLocation;
330 bool hasAdSize =
false;
331 bool hasOpSize =
false;
350 && (byte == 0xf2 || byte == 0xf3)
360 if ((byte == 0xf2 || byte == 0xf3) &&
361 ((nextByte == 0xf0) ||
362 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
371 (nextByte == 0x88 || nextByte == 0x89 ||
372 nextByte == 0xc6 || nextByte == 0xc7))
381 if (nextByte != 0x0f && nextByte != 0x90)
390 dbgprintf(insn,
"Redundant Group 1 prefix");
391 prefixGroups[0] =
true;
420 debug(
"Unhandled override");
424 dbgprintf(insn,
"Redundant Group 2 prefix");
425 prefixGroups[1] =
true;
430 dbgprintf(insn,
"Redundant Group 3 prefix");
431 prefixGroups[2] =
true;
437 dbgprintf(insn,
"Redundant Group 4 prefix");
438 prefixGroups[3] =
true;
448 dbgprintf(insn,
"Found prefix 0x%hhx", byte);
454 uint8_t byte1, byte2;
457 dbgprintf(insn,
"Couldn't read second byte of EVEX prefix");
462 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
467 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
479 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
483 dbgprintf(insn,
"Couldn't read fourth byte of EVEX prefix");
496 dbgprintf(insn,
"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
500 }
else if (byte == 0xc4) {
504 dbgprintf(insn,
"Couldn't read second byte of VEX");
531 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
535 }
else if (byte == 0xc5) {
539 dbgprintf(insn,
"Couldn't read second byte of VEX");
566 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx",
570 }
else if (byte == 0x8f) {
574 dbgprintf(insn,
"Couldn't read second byte of XOP");
578 if ((byte1 & 0x38) != 0x0) {
609 dbgprintf(insn,
"Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
615 if ((byte & 0xf0) == 0x40) {
618 if (
lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
626 dbgprintf(insn,
"Found REX prefix 0x%hhx", byte);
688 dbgprintf(insn,
"Unhandled mm field for instruction (0x%hhx)",
704 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
723 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
741 if (current == 0x0f) {
742 dbgprintf(insn,
"Found a two-byte escape prefix (0x%hhx)", current);
747 if (current == 0x38) {
748 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
754 }
else if (current == 0x3a) {
755 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
762 dbgprintf(insn,
"Didn't find a three-byte escape prefix");
795 bool hasModRMExtension;
803 if (hasModRMExtension) {
832 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
834 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
836 if (orig[i] != equiv[i]) {
837 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
839 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
841 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
859 if (name[i] ==
'6' && name[i+1] ==
'4')
875 uint16_t instructionID;
879 attrMask = ATTR_NONE;
882 attrMask |= ATTR_64BIT;
890 attrMask |= ATTR_OPSIZE;
901 attrMask |= ATTR_EVEXKZ;
903 attrMask |= ATTR_EVEXB;
905 attrMask |= ATTR_EVEXK;
907 attrMask |= ATTR_EVEXL;
909 attrMask |= ATTR_EVEXL2;
913 attrMask |= ATTR_OPSIZE;
924 attrMask |= ATTR_VEXL;
928 attrMask |= ATTR_OPSIZE;
939 attrMask |= ATTR_VEXL;
943 attrMask |= ATTR_OPSIZE;
954 attrMask |= ATTR_VEXL;
960 attrMask |= ATTR_OPSIZE;
962 attrMask |= ATTR_ADSIZE;
970 attrMask |= ATTR_REXW;
978 attrMask ^= ATTR_ADSIZE;
992 attrMask ^= ATTR_OPSIZE;
1013 attrMask ^= ATTR_OPSIZE;
1039 uint16_t instructionIDWithREXW;
1041 insn, attrMask | ATTR_REXW)) {
1047 auto SpecName =
GetInstrName(instructionIDWithREXW, miiArg);
1049 if (!
is64Bit(SpecName.data())) {
1067 attrMask |= ATTR_ADSIZE;
1069 attrMask |= ATTR_OPSIZE;
1073 attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1084 !(attrMask & ATTR_OPSIZE)) {
1094 uint16_t instructionIDWithOpsize;
1101 attrMask | ATTR_OPSIZE)) {
1113 specWithOpSizeName =
GetInstrName(instructionIDWithOpsize, miiArg);
1134 uint16_t instructionIDWithNewOpcode;
1158 insn->
spec = specWithNewOpcode;
1179 uint8_t index, base;
1190 dbgprintf(insn,
"SIB-based addressing doesn't work in 16-bit mode");
1193 sibIndexBase = SIB_INDEX_EAX;
1194 sibBaseBase = SIB_BASE_EAX;
1197 sibIndexBase = SIB_INDEX_RAX;
1198 sibBaseBase = SIB_BASE_RAX;
1250 debug(
"Cannot have Mod = 0b11 and a SIB byte");
1287 if (consumeInt8(insn, &d8))
1292 if (consumeInt16(insn, &d16))
1297 if (consumeInt32(insn, &d32))
1315 uint8_t mod, rm, reg;
1341 insn->
regBase = MODRM_REG_EAX;
1345 insn->
regBase = MODRM_REG_RAX;
1408 EA_BASE_sib : EA_BASE_sib64);
1430 insn->
eaBase = EA_BASE_sib;
1452 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1453 static uint16_t name(struct InternalInstruction *insn, \
1460 debug("Unhandled register type"); \
1464 return base + index; \
1466 if (insn->rexPrefix && \
1467 index >= 4 && index <= 7) { \
1468 return prefix##_SPL + (index - 4); \
1470 return prefix##_AL + index; \
1473 return prefix##_AX + index; \
1475 return prefix##_EAX + index; \
1477 return prefix##_RAX + index; \
1479 return prefix##_ZMM0 + index; \
1481 return prefix##_YMM0 + index; \
1485 return prefix##_XMM0 + index; \
1495 return prefix##_K0 + index; \
1497 return prefix##_MM0 + (index & 0x7); \
1498 case TYPE_SEGMENTREG: \
1501 return prefix##_ES + index; \
1502 case TYPE_DEBUGREG: \
1503 return prefix##_DR0 + index; \
1504 case TYPE_CONTROLREG: \
1505 return prefix##_CR0 + index; \
1509 return prefix##_BND0 + index; \
1546 debug(
"Expected a REG or R/M encoding in fixupReg");
1549 insn->vvvv = (
Reg)fixupRegValue(insn,
1557 insn->reg = (
Reg)fixupRegValue(insn,
1559 insn->reg - insn->regBase,
1565 if (insn->eaBase >= insn->eaRegBase) {
1566 insn->eaBase = (
EABase)fixupRMValue(insn,
1568 insn->eaBase - insn->eaRegBase,
1591 dbgprintf(insn,
"readOpcodeRegister()");
1646 debug(
"Already consumed two immediates");
1663 if (consumeUInt16(insn, &imm16))
1668 if (consumeUInt32(insn, &imm32))
1673 if (consumeUInt64(insn, &imm64))
1710 insn->
vvvv =
static_cast<Reg>(vvvv);
1740 int hasVVVV, needVVVV;
1748 needVVVV = hasVVVV && (insn->
vvvv != 0);
1751 switch (
Op.encoding) {
1777 if (
Op.type == TYPE_XMM128 ||
1778 Op.type == TYPE_XMM256)
1830 case ENCODING_WRITEMASK:
1837 dbgprintf(insn,
"Encountered an operand with an unknown encoding.");
1843 if (needVVVV)
return -1;
1870 const void *readerArg,
dlog_t logger,
void *loggerArg,
const void *miiArg,
1885 getID(insn, miiArg) ||
1894 dbgprintf(insn,
"Read from 0x%llx to 0x%llx: length %zu",
1898 dbgprintf(insn,
"Instruction exceeds 15-byte limit");
#define bFromEVEX4of4(evex)
bool consumedDisplacement
VectorExtensionType vectorExtensionType
#define wFromEVEX3of4(evex)
The specification for how to extract and interpret a full instruction and its operands.
#define bFromVEX2of3(vex)
static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
#define rmFromModRM(modRM)
static int readSIB(struct InternalInstruction *insn)
#define zFromEVEX4of4(evex)
opt Optimize addressing mode
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
#define vvvvFromVEX2of2(vex)
#define vvvvFromEVEX3of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
#define r2FromEVEX2of4(evex)
#define aaaFromEVEX4of4(evex)
#define bFromEVEX2of4(evex)
SIBIndex
All possible values of the SIB index field.
static int readOpcode(struct InternalInstruction *insn)
#define rFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
#define rFromVEX2of2(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define lFromVEX2of2(vex)
static int readVVVV(struct InternalInstruction *insn)
static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
#define xFromXOP2of3(xop)
#define v2FromEVEX4of4(evex)
SegmentOverride segmentOverride
static int getID(struct InternalInstruction *insn, const void *miiArg)
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
ModRMDecision modRMDecisions[256]
uint8_t prefixPresent[0x100]
static void unconsumeByte(struct InternalInstruction *insn)
#define ppFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define vvvvFromVEX3of3(vex)
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
format_object< Ts...> format(const char *Fmt, const Ts &...Vals)
These are helper functions used to produce formatted output.
The specification for how to extract and interpret one operand.
uint64_t necessaryPrefixLocation
static bool is64Bit(const char *name)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
#define bFromXOP2of3(xop)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
#define lFromVEX3of3(vex)
#define ppFromVEX2of2(vex)
static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode)
The x86 internal instruction, which is produced by the decoder.
static int readPrefixes(struct InternalInstruction *insn)
static void dbgprintf(struct InternalInstruction *insn, const char *format,...)
StringRef GetInstrName(unsigned Opcode, const void *mii)
#define lFromXOP3of3(xop)
EADisplacement eaDisplacement
#define mmmmmFromVEX2of3(vex)
#define ppFromEVEX3of4(evex)
#define scaleFromSIB(sib)
static int readOperands(struct InternalInstruction *insn)
ArrayRef< OperandSpecifier > operands
const InstructionSpecifier * spec
#define xFromVEX2of3(vex)
static bool is16BitEquivalent(const char *orig, const char *equiv)
#define rFromXOP2of3(xop)
#define CONSUME_FUNC(name, type)
static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix, uint64_t location)
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
uint64_t prefixLocations[0x100]
#define lFromEVEX4of4(evex)
static InstructionContext contextForAttrs(uint16_t attrMask)
#define l2FromEVEX4of4(evex)
#define wFromVEX3of3(vex)
#define wFromXOP3of3(xop)
uint8_t displacementOffset
#define indexFromSIB(sib)
SIBBase
All possible values of the SIB base field.
OperandType
Types of operands to CF instructions.
uint8_t numImmediatesConsumed
static int readModRM(struct InternalInstruction *insn)
#define mmFromEVEX2of4(evex)
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define rFromVEX2of3(vex)
Specifies which opcode->instruction tables to look at given a particular context (set of attributes)...
#define regFromModRM(modRM)
static const struct InstructionSpecifier * specifierForUID(InstrUID uid)
#define modFromModRM(modRM)
OpcodeDecision opcodeDecisions[IC_max]
static int readMaskRegister(struct InternalInstruction *insn)
uint8_t vectorExtensionPrefix[4]
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
StringRef - Represent a constant reference to a string, i.e.
#define vvvvFromXOP3of3(vex)
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int getIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
LocationClass< Ty > location(Ty &L)
#define ppFromXOP3of3(xop)
Specifies which set of ModR/M->instruction tables to look at given a particular opcode.
DisassemblerMode
Decoding mode for the Intel disassembler.
Specifies whether a ModR/M byte is needed and (if so) which instruction each possible value of the Mo...