LLVM API Documentation

X86DisassemblerDecoderCommon.h
Go to the documentation of this file.
00001 /*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*
00002  *
00003  *                     The LLVM Compiler Infrastructure
00004  *
00005  * This file is distributed under the University of Illinois Open Source
00006  * License. See LICENSE.TXT for details.
00007  *
00008  *===----------------------------------------------------------------------===*
00009  *
00010  * This file is part of the X86 Disassembler.
00011  * It contains common definitions used by both the disassembler and the table
00012  *  generator.
00013  * Documentation for the disassembler can be found in X86Disassembler.h.
00014  *
00015  *===----------------------------------------------------------------------===*/
00016 
00017 /*
00018  * This header file provides those definitions that need to be shared between
00019  * the decoder and the table generator in a C-friendly manner.
00020  */
00021 
00022 #ifndef X86DISASSEMBLERDECODERCOMMON_H
00023 #define X86DISASSEMBLERDECODERCOMMON_H
00024 
00025 #include "llvm/Support/DataTypes.h"
00026 
00027 #define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
00028 #define CONTEXTS_SYM      x86DisassemblerContexts
00029 #define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
00030 #define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
00031 #define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
00032 #define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
00033 #define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
00034 #define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes
00035 
00036 #define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
00037 #define CONTEXTS_STR      "x86DisassemblerContexts"
00038 #define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
00039 #define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
00040 #define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
00041 #define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
00042 #define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
00043 #define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"
00044 
00045 /*
00046  * Attributes of an instruction that must be known before the opcode can be
00047  * processed correctly.  Most of these indicate the presence of particular
00048  * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
00049  */
00050 #define ATTRIBUTE_BITS          \
00051   ENUM_ENTRY(ATTR_NONE,   0x00) \
00052   ENUM_ENTRY(ATTR_64BIT,  0x01) \
00053   ENUM_ENTRY(ATTR_XS,     0x02) \
00054   ENUM_ENTRY(ATTR_XD,     0x04) \
00055   ENUM_ENTRY(ATTR_REXW,   0x08) \
00056   ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
00057   ENUM_ENTRY(ATTR_ADSIZE, 0x20) \
00058   ENUM_ENTRY(ATTR_VEX,    0x40) \
00059   ENUM_ENTRY(ATTR_VEXL,   0x80)
00060 
00061 #define ENUM_ENTRY(n, v) n = v,
00062 enum attributeBits {
00063   ATTRIBUTE_BITS
00064   ATTR_max
00065 };
00066 #undef ENUM_ENTRY
00067 
00068 /*
00069  * Combinations of the above attributes that are relevant to instruction
00070  * decode.  Although other combinations are possible, they can be reduced to
00071  * these without affecting the ultimately decoded instruction.
00072  */
00073 
00074 /*           Class name           Rank  Rationale for rank assignment         */
00075 #define INSTRUCTION_CONTEXTS                                                   \
00076   ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
00077   ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
00078                                         "64-bit mode but no more")             \
00079   ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
00080                                         "operands change width")               \
00081   ENUM_ENTRY(IC_ADSIZE,             3,  "requires an ADSIZE prefix, so "       \
00082                                         "operands change width")               \
00083   ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
00084                                         "but not the operands")                \
00085   ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
00086                                         "but not the operands")                \
00087   ENUM_ENTRY(IC_XD_OPSIZE,          3,  "requires an OPSIZE prefix, so "       \
00088                                         "operands change width")               \
00089   ENUM_ENTRY(IC_XS_OPSIZE,          3,  "requires an OPSIZE prefix, so "       \
00090                                         "operands change width")               \
00091   ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
00092                                         "change width; overrides IC_OPSIZE")   \
00093   ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
00094   ENUM_ENTRY(IC_64BIT_ADSIZE,       3,  "Just as meaningful as IC_ADSIZE")     \
00095   ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
00096                                         "secondary")                           \
00097   ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
00098   ENUM_ENTRY(IC_64BIT_XD_OPSIZE,    3,  "Just as meaningful as IC_XD_OPSIZE")  \
00099   ENUM_ENTRY(IC_64BIT_XS_OPSIZE,    3,  "Just as meaningful as IC_XS_OPSIZE")  \
00100   ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
00101                                         "opcode")                              \
00102   ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
00103                                         "IC_64BIT_REXW_XS")                    \
00104   ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
00105                                         "else because this changes most "      \
00106                                         "operands' meaning")                   \
00107   ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
00108   ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
00109   ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
00110   ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
00111   ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
00112   ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
00113   ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
00114   ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
00115   ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
00116   ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
00117   ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XD prefix")\
00118   ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")         \
00119   ENUM_ENTRY(IC_VEX_L_W_OPSIZE,     5,  "requires VEX, L, W and OpSize")
00120 
00121 
00122 #define ENUM_ENTRY(n, r, d) n,
00123 typedef enum {
00124   INSTRUCTION_CONTEXTS
00125   IC_max
00126 } InstructionContext;
00127 #undef ENUM_ENTRY
00128 
00129 /*
00130  * Opcode types, which determine which decode table to use, both in the Intel
00131  * manual and also for the decoder.
00132  */
00133 typedef enum {
00134   ONEBYTE       = 0,
00135   TWOBYTE       = 1,
00136   THREEBYTE_38  = 2,
00137   THREEBYTE_3A  = 3,
00138   THREEBYTE_A6  = 4,
00139   THREEBYTE_A7  = 5
00140 } OpcodeType;
00141 
00142 /*
00143  * The following structs are used for the hierarchical decode table.  After
00144  * determining the instruction's class (i.e., which IC_* constant applies to
00145  * it), the decoder reads the opcode.  Some instructions require specific
00146  * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
00147  *
00148  * If a ModR/M byte is not required, "required" is left unset, and the values
00149  * for each instructionID are identical.
00150  */
00151 
00152 typedef uint16_t InstrUID;
00153 
00154 /*
00155  * ModRMDecisionType - describes the type of ModR/M decision, allowing the
00156  * consumer to determine the number of entries in it.
00157  *
00158  * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
00159  *                  instruction is the same.
00160  * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
00161  *                  corresponds to one instruction; otherwise, it corresponds to
00162  *                  a different instruction.
00163  * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
00164  *                  divided by 8 is used to select instruction; otherwise, each
00165  *                  value of the ModR/M byte could correspond to a different
00166  *                  instruction.
00167  * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
00168                     corresponds to instructions that use reg field as opcode
00169  * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
00170  *                  to a different instruction.
00171  */
00172 
00173 #define MODRMTYPES            \
00174   ENUM_ENTRY(MODRM_ONEENTRY)  \
00175   ENUM_ENTRY(MODRM_SPLITRM)   \
00176   ENUM_ENTRY(MODRM_SPLITMISC)  \
00177   ENUM_ENTRY(MODRM_SPLITREG)  \
00178   ENUM_ENTRY(MODRM_FULL)
00179 
00180 #define ENUM_ENTRY(n) n,
00181 typedef enum {
00182   MODRMTYPES
00183   MODRM_max
00184 } ModRMDecisionType;
00185 #undef ENUM_ENTRY
00186 
00187 /*
00188  * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
00189  *  instruction each possible value of the ModR/M byte corresponds to.  Once
00190  *  this information is known, we have narrowed down to a single instruction.
00191  */
00192 struct ModRMDecision {
00193   uint8_t     modrm_type;
00194 
00195   /* The macro below must be defined wherever this file is included. */
00196   INSTRUCTION_IDS
00197 };
00198 
00199 /*
00200  * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
00201  *   given a particular opcode.
00202  */
00203 struct OpcodeDecision {
00204   struct ModRMDecision modRMDecisions[256];
00205 };
00206 
00207 /*
00208  * ContextDecision - Specifies which opcode->instruction tables to look at given
00209  *   a particular context (set of attributes).  Since there are many possible
00210  *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
00211  *   applies given a specific set of attributes.  Hence there are only IC_max
00212  *   entries in this table, rather than 2^(ATTR_max).
00213  */
00214 struct ContextDecision {
00215   struct OpcodeDecision opcodeDecisions[IC_max];
00216 };
00217 
00218 /*
00219  * Physical encodings of instruction operands.
00220  */
00221 
00222 #define ENCODINGS                                                              \
00223   ENUM_ENTRY(ENCODING_NONE,   "")                                              \
00224   ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
00225   ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
00226   ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
00227   ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
00228   ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
00229   ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
00230   ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
00231   ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
00232   ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
00233   ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
00234   ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
00235   ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
00236   ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
00237   ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
00238                               "the opcode byte")                               \
00239   ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
00240   ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
00241   ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
00242   ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
00243                               "opcode byte")                                   \
00244                                                                                \
00245   ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
00246   ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
00247   ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
00248                               "opcode byte")                                   \
00249   ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
00250                               "in type")
00251 
00252 #define ENUM_ENTRY(n, d) n,
00253   typedef enum {
00254     ENCODINGS
00255     ENCODING_max
00256   } OperandEncoding;
00257 #undef ENUM_ENTRY
00258 
00259 /*
00260  * Semantic interpretations of instruction operands.
00261  */
00262 
00263 #define TYPES                                                                  \
00264   ENUM_ENTRY(TYPE_NONE,       "")                                              \
00265   ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
00266   ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
00267   ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
00268   ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
00269   ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
00270   ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
00271   ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
00272   ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
00273   ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
00274   ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
00275   ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
00276   ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
00277   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
00278   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
00279   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
00280   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
00281   ENUM_ENTRY(TYPE_IMM5,       "1-byte immediate operand between 0 and 31")     \
00282   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
00283   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
00284   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
00285   ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
00286   ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
00287   ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
00288   ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
00289   ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
00290   ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
00291   ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
00292   ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
00293   ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
00294   ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
00295   ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
00296   ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
00297   ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
00298   ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
00299   ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
00300   ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
00301   ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
00302                               "base)")                                         \
00303   ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
00304   ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
00305   ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
00306   ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
00307                               "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
00308   ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
00309   ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
00310   ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
00311   ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
00312                               "floating-point instructions")                   \
00313   ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
00314   ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
00315   ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
00316   ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
00317   ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
00318   ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
00319   ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
00320   ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
00321   ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
00322   ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
00323   ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
00324   ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
00325   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
00326   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
00327   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
00328                                                                                \
00329   ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
00330   ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
00331   ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
00332   ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
00333   ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
00334   ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
00335   ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
00336   ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
00337   ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
00338   ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
00339 
00340 #define ENUM_ENTRY(n, d) n,
00341 typedef enum {
00342   TYPES
00343   TYPE_max
00344 } OperandType;
00345 #undef ENUM_ENTRY
00346 
00347 /*
00348  * OperandSpecifier - The specification for how to extract and interpret one
00349  *   operand.
00350  */
00351 struct OperandSpecifier {
00352   uint8_t encoding;
00353   uint8_t type;
00354 };
00355 
00356 /*
00357  * Indicates where the opcode modifier (if any) is to be found.  Extended
00358  * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
00359  */
00360 
00361 #define MODIFIER_TYPES        \
00362   ENUM_ENTRY(MODIFIER_NONE)   \
00363   ENUM_ENTRY(MODIFIER_OPCODE) \
00364   ENUM_ENTRY(MODIFIER_MODRM)
00365 
00366 #define ENUM_ENTRY(n) n,
00367 typedef enum {
00368   MODIFIER_TYPES
00369   MODIFIER_max
00370 } ModifierType;
00371 #undef ENUM_ENTRY
00372 
00373 #define X86_MAX_OPERANDS 5
00374 
00375 /*
00376  * The specification for how to extract and interpret a full instruction and
00377  * its operands.
00378  */
00379 struct InstructionSpecifier {
00380   uint8_t modifierType;
00381   uint8_t modifierBase;
00382 
00383   /* The macro below must be defined wherever this file is included. */
00384   INSTRUCTION_SPECIFIER_FIELDS
00385 };
00386 
00387 /*
00388  * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
00389  * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
00390  * respectively.
00391  */
00392 typedef enum {
00393   MODE_16BIT,
00394   MODE_32BIT,
00395   MODE_64BIT
00396 } DisassemblerMode;
00397 
00398 #endif