LLVM API Documentation
00001 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* 00002 * 00003 * The LLVM Compiler Infrastructure 00004 * 00005 * This file is distributed under the University of Illinois Open Source 00006 * License. See LICENSE.TXT for details. 00007 * 00008 *===----------------------------------------------------------------------===* 00009 * 00010 * This file is part of the X86 Disassembler. 00011 * It contains the public interface of the instruction decoder. 00012 * Documentation for the disassembler can be found in X86Disassembler.h. 00013 * 00014 *===----------------------------------------------------------------------===*/ 00015 00016 #ifndef X86DISASSEMBLERDECODER_H 00017 #define X86DISASSEMBLERDECODER_H 00018 00019 #ifdef __cplusplus 00020 extern "C" { 00021 #endif 00022 00023 #define INSTRUCTION_SPECIFIER_FIELDS \ 00024 uint16_t operands; 00025 00026 #define INSTRUCTION_IDS \ 00027 uint16_t instructionIDs; 00028 00029 #include "X86DisassemblerDecoderCommon.h" 00030 00031 #undef INSTRUCTION_SPECIFIER_FIELDS 00032 #undef INSTRUCTION_IDS 00033 00034 /* 00035 * Accessor functions for various fields of an Intel instruction 00036 */ 00037 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) 00038 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3) 00039 #define rmFromModRM(modRM) ((modRM) & 0x7) 00040 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) 00041 #define indexFromSIB(sib) (((sib) & 0x38) >> 3) 00042 #define baseFromSIB(sib) ((sib) & 0x7) 00043 #define wFromREX(rex) (((rex) & 0x8) >> 3) 00044 #define rFromREX(rex) (((rex) & 0x4) >> 2) 00045 #define xFromREX(rex) (((rex) & 0x2) >> 1) 00046 #define bFromREX(rex) ((rex) & 0x1) 00047 00048 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) 00049 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) 00050 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) 00051 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) 00052 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) 00053 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) 00054 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) 00055 #define ppFromVEX3of3(vex) ((vex) & 0x3) 00056 00057 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) 00058 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) 00059 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) 00060 #define ppFromVEX2of2(vex) ((vex) & 0x3) 00061 00062 /* 00063 * These enums represent Intel registers for use by the decoder. 00064 */ 00065 00066 #define REGS_8BIT \ 00067 ENTRY(AL) \ 00068 ENTRY(CL) \ 00069 ENTRY(DL) \ 00070 ENTRY(BL) \ 00071 ENTRY(AH) \ 00072 ENTRY(CH) \ 00073 ENTRY(DH) \ 00074 ENTRY(BH) \ 00075 ENTRY(R8B) \ 00076 ENTRY(R9B) \ 00077 ENTRY(R10B) \ 00078 ENTRY(R11B) \ 00079 ENTRY(R12B) \ 00080 ENTRY(R13B) \ 00081 ENTRY(R14B) \ 00082 ENTRY(R15B) \ 00083 ENTRY(SPL) \ 00084 ENTRY(BPL) \ 00085 ENTRY(SIL) \ 00086 ENTRY(DIL) 00087 00088 #define EA_BASES_16BIT \ 00089 ENTRY(BX_SI) \ 00090 ENTRY(BX_DI) \ 00091 ENTRY(BP_SI) \ 00092 ENTRY(BP_DI) \ 00093 ENTRY(SI) \ 00094 ENTRY(DI) \ 00095 ENTRY(BP) \ 00096 ENTRY(BX) \ 00097 ENTRY(R8W) \ 00098 ENTRY(R9W) \ 00099 ENTRY(R10W) \ 00100 ENTRY(R11W) \ 00101 ENTRY(R12W) \ 00102 ENTRY(R13W) \ 00103 ENTRY(R14W) \ 00104 ENTRY(R15W) 00105 00106 #define REGS_16BIT \ 00107 ENTRY(AX) \ 00108 ENTRY(CX) \ 00109 ENTRY(DX) \ 00110 ENTRY(BX) \ 00111 ENTRY(SP) \ 00112 ENTRY(BP) \ 00113 ENTRY(SI) \ 00114 ENTRY(DI) \ 00115 ENTRY(R8W) \ 00116 ENTRY(R9W) \ 00117 ENTRY(R10W) \ 00118 ENTRY(R11W) \ 00119 ENTRY(R12W) \ 00120 ENTRY(R13W) \ 00121 ENTRY(R14W) \ 00122 ENTRY(R15W) 00123 00124 #define EA_BASES_32BIT \ 00125 ENTRY(EAX) \ 00126 ENTRY(ECX) \ 00127 ENTRY(EDX) \ 00128 ENTRY(EBX) \ 00129 ENTRY(sib) \ 00130 ENTRY(EBP) \ 00131 ENTRY(ESI) \ 00132 ENTRY(EDI) \ 00133 ENTRY(R8D) \ 00134 ENTRY(R9D) \ 00135 ENTRY(R10D) \ 00136 ENTRY(R11D) \ 00137 ENTRY(R12D) \ 00138 ENTRY(R13D) \ 00139 ENTRY(R14D) \ 00140 ENTRY(R15D) 00141 00142 #define REGS_32BIT \ 00143 ENTRY(EAX) \ 00144 ENTRY(ECX) \ 00145 ENTRY(EDX) \ 00146 ENTRY(EBX) \ 00147 ENTRY(ESP) \ 00148 ENTRY(EBP) \ 00149 ENTRY(ESI) \ 00150 ENTRY(EDI) \ 00151 ENTRY(R8D) \ 00152 ENTRY(R9D) \ 00153 ENTRY(R10D) \ 00154 ENTRY(R11D) \ 00155 ENTRY(R12D) \ 00156 ENTRY(R13D) \ 00157 ENTRY(R14D) \ 00158 ENTRY(R15D) 00159 00160 #define EA_BASES_64BIT \ 00161 ENTRY(RAX) \ 00162 ENTRY(RCX) \ 00163 ENTRY(RDX) \ 00164 ENTRY(RBX) \ 00165 ENTRY(sib64) \ 00166 ENTRY(RBP) \ 00167 ENTRY(RSI) \ 00168 ENTRY(RDI) \ 00169 ENTRY(R8) \ 00170 ENTRY(R9) \ 00171 ENTRY(R10) \ 00172 ENTRY(R11) \ 00173 ENTRY(R12) \ 00174 ENTRY(R13) \ 00175 ENTRY(R14) \ 00176 ENTRY(R15) 00177 00178 #define REGS_64BIT \ 00179 ENTRY(RAX) \ 00180 ENTRY(RCX) \ 00181 ENTRY(RDX) \ 00182 ENTRY(RBX) \ 00183 ENTRY(RSP) \ 00184 ENTRY(RBP) \ 00185 ENTRY(RSI) \ 00186 ENTRY(RDI) \ 00187 ENTRY(R8) \ 00188 ENTRY(R9) \ 00189 ENTRY(R10) \ 00190 ENTRY(R11) \ 00191 ENTRY(R12) \ 00192 ENTRY(R13) \ 00193 ENTRY(R14) \ 00194 ENTRY(R15) 00195 00196 #define REGS_MMX \ 00197 ENTRY(MM0) \ 00198 ENTRY(MM1) \ 00199 ENTRY(MM2) \ 00200 ENTRY(MM3) \ 00201 ENTRY(MM4) \ 00202 ENTRY(MM5) \ 00203 ENTRY(MM6) \ 00204 ENTRY(MM7) 00205 00206 #define REGS_XMM \ 00207 ENTRY(XMM0) \ 00208 ENTRY(XMM1) \ 00209 ENTRY(XMM2) \ 00210 ENTRY(XMM3) \ 00211 ENTRY(XMM4) \ 00212 ENTRY(XMM5) \ 00213 ENTRY(XMM6) \ 00214 ENTRY(XMM7) \ 00215 ENTRY(XMM8) \ 00216 ENTRY(XMM9) \ 00217 ENTRY(XMM10) \ 00218 ENTRY(XMM11) \ 00219 ENTRY(XMM12) \ 00220 ENTRY(XMM13) \ 00221 ENTRY(XMM14) \ 00222 ENTRY(XMM15) 00223 00224 #define REGS_YMM \ 00225 ENTRY(YMM0) \ 00226 ENTRY(YMM1) \ 00227 ENTRY(YMM2) \ 00228 ENTRY(YMM3) \ 00229 ENTRY(YMM4) \ 00230 ENTRY(YMM5) \ 00231 ENTRY(YMM6) \ 00232 ENTRY(YMM7) \ 00233 ENTRY(YMM8) \ 00234 ENTRY(YMM9) \ 00235 ENTRY(YMM10) \ 00236 ENTRY(YMM11) \ 00237 ENTRY(YMM12) \ 00238 ENTRY(YMM13) \ 00239 ENTRY(YMM14) \ 00240 ENTRY(YMM15) 00241 00242 #define REGS_SEGMENT \ 00243 ENTRY(ES) \ 00244 ENTRY(CS) \ 00245 ENTRY(SS) \ 00246 ENTRY(DS) \ 00247 ENTRY(FS) \ 00248 ENTRY(GS) 00249 00250 #define REGS_DEBUG \ 00251 ENTRY(DR0) \ 00252 ENTRY(DR1) \ 00253 ENTRY(DR2) \ 00254 ENTRY(DR3) \ 00255 ENTRY(DR4) \ 00256 ENTRY(DR5) \ 00257 ENTRY(DR6) \ 00258 ENTRY(DR7) 00259 00260 #define REGS_CONTROL \ 00261 ENTRY(CR0) \ 00262 ENTRY(CR1) \ 00263 ENTRY(CR2) \ 00264 ENTRY(CR3) \ 00265 ENTRY(CR4) \ 00266 ENTRY(CR5) \ 00267 ENTRY(CR6) \ 00268 ENTRY(CR7) \ 00269 ENTRY(CR8) 00270 00271 #define ALL_EA_BASES \ 00272 EA_BASES_16BIT \ 00273 EA_BASES_32BIT \ 00274 EA_BASES_64BIT 00275 00276 #define ALL_SIB_BASES \ 00277 REGS_32BIT \ 00278 REGS_64BIT 00279 00280 #define ALL_REGS \ 00281 REGS_8BIT \ 00282 REGS_16BIT \ 00283 REGS_32BIT \ 00284 REGS_64BIT \ 00285 REGS_MMX \ 00286 REGS_XMM \ 00287 REGS_YMM \ 00288 REGS_SEGMENT \ 00289 REGS_DEBUG \ 00290 REGS_CONTROL \ 00291 ENTRY(RIP) 00292 00293 /* 00294 * EABase - All possible values of the base field for effective-address 00295 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We 00296 * distinguish between bases (EA_BASE_*) and registers that just happen to be 00297 * referred to when Mod == 0b11 (EA_REG_*). 00298 */ 00299 typedef enum { 00300 EA_BASE_NONE, 00301 #define ENTRY(x) EA_BASE_##x, 00302 ALL_EA_BASES 00303 #undef ENTRY 00304 #define ENTRY(x) EA_REG_##x, 00305 ALL_REGS 00306 #undef ENTRY 00307 EA_max 00308 } EABase; 00309 00310 /* 00311 * SIBIndex - All possible values of the SIB index field. 00312 * Borrows entries from ALL_EA_BASES with the special case that 00313 * sib is synonymous with NONE. 00314 * Vector SIB: index can be XMM or YMM. 00315 */ 00316 typedef enum { 00317 SIB_INDEX_NONE, 00318 #define ENTRY(x) SIB_INDEX_##x, 00319 ALL_EA_BASES 00320 REGS_XMM 00321 REGS_YMM 00322 #undef ENTRY 00323 SIB_INDEX_max 00324 } SIBIndex; 00325 00326 /* 00327 * SIBBase - All possible values of the SIB base field. 00328 */ 00329 typedef enum { 00330 SIB_BASE_NONE, 00331 #define ENTRY(x) SIB_BASE_##x, 00332 ALL_SIB_BASES 00333 #undef ENTRY 00334 SIB_BASE_max 00335 } SIBBase; 00336 00337 /* 00338 * EADisplacement - Possible displacement types for effective-address 00339 * computations. 00340 */ 00341 typedef enum { 00342 EA_DISP_NONE, 00343 EA_DISP_8, 00344 EA_DISP_16, 00345 EA_DISP_32 00346 } EADisplacement; 00347 00348 /* 00349 * Reg - All possible values of the reg field in the ModR/M byte. 00350 */ 00351 typedef enum { 00352 #define ENTRY(x) MODRM_REG_##x, 00353 ALL_REGS 00354 #undef ENTRY 00355 MODRM_REG_max 00356 } Reg; 00357 00358 /* 00359 * SegmentOverride - All possible segment overrides. 00360 */ 00361 typedef enum { 00362 SEG_OVERRIDE_NONE, 00363 SEG_OVERRIDE_CS, 00364 SEG_OVERRIDE_SS, 00365 SEG_OVERRIDE_DS, 00366 SEG_OVERRIDE_ES, 00367 SEG_OVERRIDE_FS, 00368 SEG_OVERRIDE_GS, 00369 SEG_OVERRIDE_max 00370 } SegmentOverride; 00371 00372 /* 00373 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field 00374 */ 00375 00376 typedef enum { 00377 VEX_LOB_0F = 0x1, 00378 VEX_LOB_0F38 = 0x2, 00379 VEX_LOB_0F3A = 0x3 00380 } VEXLeadingOpcodeByte; 00381 00382 /* 00383 * VEXPrefixCode - Possible values for the VEX.pp field 00384 */ 00385 00386 typedef enum { 00387 VEX_PREFIX_NONE = 0x0, 00388 VEX_PREFIX_66 = 0x1, 00389 VEX_PREFIX_F3 = 0x2, 00390 VEX_PREFIX_F2 = 0x3 00391 } VEXPrefixCode; 00392 00393 typedef uint8_t BOOL; 00394 00395 /* 00396 * byteReader_t - Type for the byte reader that the consumer must provide to 00397 * the decoder. Reads a single byte from the instruction's address space. 00398 * @param arg - A baton that the consumer can associate with any internal 00399 * state that it needs. 00400 * @param byte - A pointer to a single byte in memory that should be set to 00401 * contain the value at address. 00402 * @param address - The address in the instruction's address space that should 00403 * be read from. 00404 * @return - -1 if the byte cannot be read for any reason; 0 otherwise. 00405 */ 00406 typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); 00407 00408 /* 00409 * dlog_t - Type for the logging function that the consumer can provide to 00410 * get debugging output from the decoder. 00411 * @param arg - A baton that the consumer can associate with any internal 00412 * state that it needs. 00413 * @param log - A string that contains the message. Will be reused after 00414 * the logger returns. 00415 */ 00416 typedef void (*dlog_t)(void* arg, const char *log); 00417 00418 /* 00419 * The x86 internal instruction, which is produced by the decoder. 00420 */ 00421 struct InternalInstruction { 00422 /* Reader interface (C) */ 00423 byteReader_t reader; 00424 /* Opaque value passed to the reader */ 00425 const void* readerArg; 00426 /* The address of the next byte to read via the reader */ 00427 uint64_t readerCursor; 00428 00429 /* Logger interface (C) */ 00430 dlog_t dlog; 00431 /* Opaque value passed to the logger */ 00432 void* dlogArg; 00433 00434 /* General instruction information */ 00435 00436 /* The mode to disassemble for (64-bit, protected, real) */ 00437 DisassemblerMode mode; 00438 /* The start of the instruction, usable with the reader */ 00439 uint64_t startLocation; 00440 /* The length of the instruction, in bytes */ 00441 size_t length; 00442 00443 /* Prefix state */ 00444 00445 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ 00446 uint8_t prefixPresent[0x100]; 00447 /* contains the location (for use with the reader) of the prefix byte */ 00448 uint64_t prefixLocations[0x100]; 00449 /* The value of the VEX prefix, if present */ 00450 uint8_t vexPrefix[3]; 00451 /* The length of the VEX prefix (0 if not present) */ 00452 uint8_t vexSize; 00453 /* The value of the REX prefix, if present */ 00454 uint8_t rexPrefix; 00455 /* The location where a mandatory prefix would have to be (i.e., right before 00456 the opcode, or right before the REX prefix if one is present) */ 00457 uint64_t necessaryPrefixLocation; 00458 /* The segment override type */ 00459 SegmentOverride segmentOverride; 00460 00461 /* Sizes of various critical pieces of data, in bytes */ 00462 uint8_t registerSize; 00463 uint8_t addressSize; 00464 uint8_t displacementSize; 00465 uint8_t immediateSize; 00466 00467 /* Offsets from the start of the instruction to the pieces of data, which is 00468 needed to find relocation entries for adding symbolic operands */ 00469 uint8_t displacementOffset; 00470 uint8_t immediateOffset; 00471 00472 /* opcode state */ 00473 00474 /* The value of the two-byte escape prefix (usually 0x0f) */ 00475 uint8_t twoByteEscape; 00476 /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ 00477 uint8_t threeByteEscape; 00478 /* The last byte of the opcode, not counting any ModR/M extension */ 00479 uint8_t opcode; 00480 /* The ModR/M byte of the instruction, if it is an opcode extension */ 00481 uint8_t modRMExtension; 00482 00483 /* decode state */ 00484 00485 /* The type of opcode, used for indexing into the array of decode tables */ 00486 OpcodeType opcodeType; 00487 /* The instruction ID, extracted from the decode table */ 00488 uint16_t instructionID; 00489 /* The specifier for the instruction, from the instruction info table */ 00490 const struct InstructionSpecifier *spec; 00491 00492 /* state for additional bytes, consumed during operand decode. Pattern: 00493 consumed___ indicates that the byte was already consumed and does not 00494 need to be consumed again */ 00495 00496 /* The VEX.vvvv field, which contains a third register operand for some AVX 00497 instructions */ 00498 Reg vvvv; 00499 00500 /* The ModR/M byte, which contains most register operands and some portion of 00501 all memory operands */ 00502 BOOL consumedModRM; 00503 uint8_t modRM; 00504 00505 /* The SIB byte, used for more complex 32- or 64-bit memory operands */ 00506 BOOL consumedSIB; 00507 uint8_t sib; 00508 00509 /* The displacement, used for memory operands */ 00510 BOOL consumedDisplacement; 00511 int32_t displacement; 00512 00513 /* Immediates. There can be two in some cases */ 00514 uint8_t numImmediatesConsumed; 00515 uint8_t numImmediatesTranslated; 00516 uint64_t immediates[2]; 00517 00518 /* A register or immediate operand encoded into the opcode */ 00519 BOOL consumedOpcodeModifier; 00520 uint8_t opcodeModifier; 00521 Reg opcodeRegister; 00522 00523 /* Portions of the ModR/M byte */ 00524 00525 /* These fields determine the allowable values for the ModR/M fields, which 00526 depend on operand and address widths */ 00527 EABase eaBaseBase; 00528 EABase eaRegBase; 00529 Reg regBase; 00530 00531 /* The Mod and R/M fields can encode a base for an effective address, or a 00532 register. These are separated into two fields here */ 00533 EABase eaBase; 00534 EADisplacement eaDisplacement; 00535 /* The reg field always encodes a register */ 00536 Reg reg; 00537 00538 /* SIB state */ 00539 SIBIndex sibIndex; 00540 uint8_t sibScale; 00541 SIBBase sibBase; 00542 00543 const struct OperandSpecifier *operands; 00544 }; 00545 00546 /* decodeInstruction - Decode one instruction and store the decoding results in 00547 * a buffer provided by the consumer. 00548 * @param insn - The buffer to store the instruction in. Allocated by the 00549 * consumer. 00550 * @param reader - The byteReader_t for the bytes to be read. 00551 * @param readerArg - An argument to pass to the reader for storing context 00552 * specific to the consumer. May be NULL. 00553 * @param logger - The dlog_t to be used in printing status messages from the 00554 * disassembler. May be NULL. 00555 * @param loggerArg - An argument to pass to the logger for storing context 00556 * specific to the logger. May be NULL. 00557 * @param startLoc - The address (in the reader's address space) of the first 00558 * byte in the instruction. 00559 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. 00560 * @return - Nonzero if there was an error during decode, 0 otherwise. 00561 */ 00562 int decodeInstruction(struct InternalInstruction* insn, 00563 byteReader_t reader, 00564 const void* readerArg, 00565 dlog_t logger, 00566 void* loggerArg, 00567 const void* miiArg, 00568 uint64_t startLoc, 00569 DisassemblerMode mode); 00570 00571 /* x86DisassemblerDebug - C-accessible function for printing a message to 00572 * debugs() 00573 * @param file - The name of the file printing the debug message. 00574 * @param line - The line number that printed the debug message. 00575 * @param s - The message to print. 00576 */ 00577 00578 void x86DisassemblerDebug(const char *file, 00579 unsigned line, 00580 const char *s); 00581 00582 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); 00583 00584 #ifdef __cplusplus 00585 } 00586 #endif 00587 00588 #endif