LLVM API Documentation
00001 /*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* 00002 * 00003 * The LLVM Compiler Infrastructure 00004 * 00005 * This file is distributed under the University of Illinois Open Source 00006 * License. See LICENSE.TXT for details. 00007 * 00008 *===----------------------------------------------------------------------===* 00009 * 00010 * This file is part of the X86 Disassembler. 00011 * It contains common definitions used by both the disassembler and the table 00012 * generator. 00013 * Documentation for the disassembler can be found in X86Disassembler.h. 00014 * 00015 *===----------------------------------------------------------------------===*/ 00016 00017 /* 00018 * This header file provides those definitions that need to be shared between 00019 * the decoder and the table generator in a C-friendly manner. 00020 */ 00021 00022 #ifndef X86DISASSEMBLERDECODERCOMMON_H 00023 #define X86DISASSEMBLERDECODERCOMMON_H 00024 00025 #include "llvm/Support/DataTypes.h" 00026 00027 #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers 00028 #define CONTEXTS_SYM x86DisassemblerContexts 00029 #define ONEBYTE_SYM x86DisassemblerOneByteOpcodes 00030 #define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes 00031 #define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes 00032 #define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes 00033 #define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes 00034 #define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes 00035 00036 #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" 00037 #define CONTEXTS_STR "x86DisassemblerContexts" 00038 #define ONEBYTE_STR "x86DisassemblerOneByteOpcodes" 00039 #define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes" 00040 #define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes" 00041 #define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes" 00042 #define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes" 00043 #define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes" 00044 00045 /* 00046 * Attributes of an instruction that must be known before the opcode can be 00047 * processed correctly. Most of these indicate the presence of particular 00048 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. 00049 */ 00050 #define ATTRIBUTE_BITS \ 00051 ENUM_ENTRY(ATTR_NONE, 0x00) \ 00052 ENUM_ENTRY(ATTR_64BIT, 0x01) \ 00053 ENUM_ENTRY(ATTR_XS, 0x02) \ 00054 ENUM_ENTRY(ATTR_XD, 0x04) \ 00055 ENUM_ENTRY(ATTR_REXW, 0x08) \ 00056 ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ 00057 ENUM_ENTRY(ATTR_ADSIZE, 0x20) \ 00058 ENUM_ENTRY(ATTR_VEX, 0x40) \ 00059 ENUM_ENTRY(ATTR_VEXL, 0x80) 00060 00061 #define ENUM_ENTRY(n, v) n = v, 00062 enum attributeBits { 00063 ATTRIBUTE_BITS 00064 ATTR_max 00065 }; 00066 #undef ENUM_ENTRY 00067 00068 /* 00069 * Combinations of the above attributes that are relevant to instruction 00070 * decode. Although other combinations are possible, they can be reduced to 00071 * these without affecting the ultimately decoded instruction. 00072 */ 00073 00074 /* Class name Rank Rationale for rank assignment */ 00075 #define INSTRUCTION_CONTEXTS \ 00076 ENUM_ENTRY(IC, 0, "says nothing about the instruction") \ 00077 ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \ 00078 "64-bit mode but no more") \ 00079 ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \ 00080 "operands change width") \ 00081 ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \ 00082 "operands change width") \ 00083 ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ 00084 "but not the operands") \ 00085 ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ 00086 "but not the operands") \ 00087 ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \ 00088 "operands change width") \ 00089 ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ 00090 "operands change width") \ 00091 ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ 00092 "change width; overrides IC_OPSIZE") \ 00093 ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ 00094 ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \ 00095 ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ 00096 "secondary") \ 00097 ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ 00098 ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ 00099 ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ 00100 ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ 00101 "opcode") \ 00102 ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ 00103 "IC_64BIT_REXW_XS") \ 00104 ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ 00105 "else because this changes most " \ 00106 "operands' meaning") \ 00107 ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ 00108 ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ 00109 ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ 00110 ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ 00111 ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ 00112 ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ 00113 ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ 00114 ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ 00115 ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ 00116 ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ 00117 ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ 00118 ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \ 00119 ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") 00120 00121 00122 #define ENUM_ENTRY(n, r, d) n, 00123 typedef enum { 00124 INSTRUCTION_CONTEXTS 00125 IC_max 00126 } InstructionContext; 00127 #undef ENUM_ENTRY 00128 00129 /* 00130 * Opcode types, which determine which decode table to use, both in the Intel 00131 * manual and also for the decoder. 00132 */ 00133 typedef enum { 00134 ONEBYTE = 0, 00135 TWOBYTE = 1, 00136 THREEBYTE_38 = 2, 00137 THREEBYTE_3A = 3, 00138 THREEBYTE_A6 = 4, 00139 THREEBYTE_A7 = 5 00140 } OpcodeType; 00141 00142 /* 00143 * The following structs are used for the hierarchical decode table. After 00144 * determining the instruction's class (i.e., which IC_* constant applies to 00145 * it), the decoder reads the opcode. Some instructions require specific 00146 * values of the ModR/M byte, so the ModR/M byte indexes into the final table. 00147 * 00148 * If a ModR/M byte is not required, "required" is left unset, and the values 00149 * for each instructionID are identical. 00150 */ 00151 00152 typedef uint16_t InstrUID; 00153 00154 /* 00155 * ModRMDecisionType - describes the type of ModR/M decision, allowing the 00156 * consumer to determine the number of entries in it. 00157 * 00158 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded 00159 * instruction is the same. 00160 * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode 00161 * corresponds to one instruction; otherwise, it corresponds to 00162 * a different instruction. 00163 * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte 00164 * divided by 8 is used to select instruction; otherwise, each 00165 * value of the ModR/M byte could correspond to a different 00166 * instruction. 00167 * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This 00168 corresponds to instructions that use reg field as opcode 00169 * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond 00170 * to a different instruction. 00171 */ 00172 00173 #define MODRMTYPES \ 00174 ENUM_ENTRY(MODRM_ONEENTRY) \ 00175 ENUM_ENTRY(MODRM_SPLITRM) \ 00176 ENUM_ENTRY(MODRM_SPLITMISC) \ 00177 ENUM_ENTRY(MODRM_SPLITREG) \ 00178 ENUM_ENTRY(MODRM_FULL) 00179 00180 #define ENUM_ENTRY(n) n, 00181 typedef enum { 00182 MODRMTYPES 00183 MODRM_max 00184 } ModRMDecisionType; 00185 #undef ENUM_ENTRY 00186 00187 /* 00188 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 00189 * instruction each possible value of the ModR/M byte corresponds to. Once 00190 * this information is known, we have narrowed down to a single instruction. 00191 */ 00192 struct ModRMDecision { 00193 uint8_t modrm_type; 00194 00195 /* The macro below must be defined wherever this file is included. */ 00196 INSTRUCTION_IDS 00197 }; 00198 00199 /* 00200 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at 00201 * given a particular opcode. 00202 */ 00203 struct OpcodeDecision { 00204 struct ModRMDecision modRMDecisions[256]; 00205 }; 00206 00207 /* 00208 * ContextDecision - Specifies which opcode->instruction tables to look at given 00209 * a particular context (set of attributes). Since there are many possible 00210 * contexts, the decoder first uses CONTEXTS_SYM to determine which context 00211 * applies given a specific set of attributes. Hence there are only IC_max 00212 * entries in this table, rather than 2^(ATTR_max). 00213 */ 00214 struct ContextDecision { 00215 struct OpcodeDecision opcodeDecisions[IC_max]; 00216 }; 00217 00218 /* 00219 * Physical encodings of instruction operands. 00220 */ 00221 00222 #define ENCODINGS \ 00223 ENUM_ENTRY(ENCODING_NONE, "") \ 00224 ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ 00225 ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ 00226 ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ 00227 ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \ 00228 ENUM_ENTRY(ENCODING_CW, "2-byte") \ 00229 ENUM_ENTRY(ENCODING_CD, "4-byte") \ 00230 ENUM_ENTRY(ENCODING_CP, "6-byte") \ 00231 ENUM_ENTRY(ENCODING_CO, "8-byte") \ 00232 ENUM_ENTRY(ENCODING_CT, "10-byte") \ 00233 ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \ 00234 ENUM_ENTRY(ENCODING_IW, "2-byte") \ 00235 ENUM_ENTRY(ENCODING_ID, "4-byte") \ 00236 ENUM_ENTRY(ENCODING_IO, "8-byte") \ 00237 ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \ 00238 "the opcode byte") \ 00239 ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \ 00240 ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \ 00241 ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \ 00242 ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \ 00243 "opcode byte") \ 00244 \ 00245 ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \ 00246 ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \ 00247 ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ 00248 "opcode byte") \ 00249 ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ 00250 "in type") 00251 00252 #define ENUM_ENTRY(n, d) n, 00253 typedef enum { 00254 ENCODINGS 00255 ENCODING_max 00256 } OperandEncoding; 00257 #undef ENUM_ENTRY 00258 00259 /* 00260 * Semantic interpretations of instruction operands. 00261 */ 00262 00263 #define TYPES \ 00264 ENUM_ENTRY(TYPE_NONE, "") \ 00265 ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \ 00266 ENUM_ENTRY(TYPE_REL16, "2-byte") \ 00267 ENUM_ENTRY(TYPE_REL32, "4-byte") \ 00268 ENUM_ENTRY(TYPE_REL64, "8-byte") \ 00269 ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \ 00270 ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \ 00271 ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \ 00272 ENUM_ENTRY(TYPE_R8, "1-byte register operand") \ 00273 ENUM_ENTRY(TYPE_R16, "2-byte") \ 00274 ENUM_ENTRY(TYPE_R32, "4-byte") \ 00275 ENUM_ENTRY(TYPE_R64, "8-byte") \ 00276 ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \ 00277 ENUM_ENTRY(TYPE_IMM16, "2-byte") \ 00278 ENUM_ENTRY(TYPE_IMM32, "4-byte") \ 00279 ENUM_ENTRY(TYPE_IMM64, "8-byte") \ 00280 ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ 00281 ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ 00282 ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ 00283 ENUM_ENTRY(TYPE_RM16, "2-byte") \ 00284 ENUM_ENTRY(TYPE_RM32, "4-byte") \ 00285 ENUM_ENTRY(TYPE_RM64, "8-byte") \ 00286 ENUM_ENTRY(TYPE_M, "Memory operand") \ 00287 ENUM_ENTRY(TYPE_M8, "1-byte") \ 00288 ENUM_ENTRY(TYPE_M16, "2-byte") \ 00289 ENUM_ENTRY(TYPE_M32, "4-byte") \ 00290 ENUM_ENTRY(TYPE_M64, "8-byte") \ 00291 ENUM_ENTRY(TYPE_LEA, "Effective address") \ 00292 ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \ 00293 ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \ 00294 ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \ 00295 ENUM_ENTRY(TYPE_M1632, "2+4-byte") \ 00296 ENUM_ENTRY(TYPE_M1664, "2+8-byte") \ 00297 ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \ 00298 ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \ 00299 ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \ 00300 ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \ 00301 ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \ 00302 "base)") \ 00303 ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \ 00304 ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \ 00305 ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \ 00306 ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \ 00307 "2 = SS, 3 = DS, 4 = FS, 5 = GS") \ 00308 ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \ 00309 ENUM_ENTRY(TYPE_M64FP, "64-bit") \ 00310 ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \ 00311 ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \ 00312 "floating-point instructions") \ 00313 ENUM_ENTRY(TYPE_M32INT, "4-byte") \ 00314 ENUM_ENTRY(TYPE_M64INT, "8-byte") \ 00315 ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \ 00316 ENUM_ENTRY(TYPE_MM, "MMX register operand") \ 00317 ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \ 00318 ENUM_ENTRY(TYPE_MM64, "8-byte") \ 00319 ENUM_ENTRY(TYPE_XMM, "XMM register operand") \ 00320 ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ 00321 ENUM_ENTRY(TYPE_XMM64, "8-byte") \ 00322 ENUM_ENTRY(TYPE_XMM128, "16-byte") \ 00323 ENUM_ENTRY(TYPE_XMM256, "32-byte") \ 00324 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ 00325 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ 00326 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ 00327 ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ 00328 \ 00329 ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ 00330 ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ 00331 ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \ 00332 ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \ 00333 ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \ 00334 ENUM_ENTRY(TYPE_DUP1, "operand 1") \ 00335 ENUM_ENTRY(TYPE_DUP2, "operand 2") \ 00336 ENUM_ENTRY(TYPE_DUP3, "operand 3") \ 00337 ENUM_ENTRY(TYPE_DUP4, "operand 4") \ 00338 ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") 00339 00340 #define ENUM_ENTRY(n, d) n, 00341 typedef enum { 00342 TYPES 00343 TYPE_max 00344 } OperandType; 00345 #undef ENUM_ENTRY 00346 00347 /* 00348 * OperandSpecifier - The specification for how to extract and interpret one 00349 * operand. 00350 */ 00351 struct OperandSpecifier { 00352 uint8_t encoding; 00353 uint8_t type; 00354 }; 00355 00356 /* 00357 * Indicates where the opcode modifier (if any) is to be found. Extended 00358 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. 00359 */ 00360 00361 #define MODIFIER_TYPES \ 00362 ENUM_ENTRY(MODIFIER_NONE) \ 00363 ENUM_ENTRY(MODIFIER_OPCODE) \ 00364 ENUM_ENTRY(MODIFIER_MODRM) 00365 00366 #define ENUM_ENTRY(n) n, 00367 typedef enum { 00368 MODIFIER_TYPES 00369 MODIFIER_max 00370 } ModifierType; 00371 #undef ENUM_ENTRY 00372 00373 #define X86_MAX_OPERANDS 5 00374 00375 /* 00376 * The specification for how to extract and interpret a full instruction and 00377 * its operands. 00378 */ 00379 struct InstructionSpecifier { 00380 uint8_t modifierType; 00381 uint8_t modifierBase; 00382 00383 /* The macro below must be defined wherever this file is included. */ 00384 INSTRUCTION_SPECIFIER_FIELDS 00385 }; 00386 00387 /* 00388 * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode 00389 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, 00390 * respectively. 00391 */ 00392 typedef enum { 00393 MODE_16BIT, 00394 MODE_32BIT, 00395 MODE_64BIT 00396 } DisassemblerMode; 00397 00398 #endif