LLVM API Documentation

X86DisassemblerDecoder.h
Go to the documentation of this file.
00001 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
00002  *
00003  *                     The LLVM Compiler Infrastructure
00004  *
00005  * This file is distributed under the University of Illinois Open Source
00006  * License. See LICENSE.TXT for details.
00007  *
00008  *===----------------------------------------------------------------------===*
00009  *
00010  * This file is part of the X86 Disassembler.
00011  * It contains the public interface of the instruction decoder.
00012  * Documentation for the disassembler can be found in X86Disassembler.h.
00013  *
00014  *===----------------------------------------------------------------------===*/
00015 
00016 #ifndef X86DISASSEMBLERDECODER_H
00017 #define X86DISASSEMBLERDECODER_H
00018 
00019 #ifdef __cplusplus
00020 extern "C" {
00021 #endif
00022 
00023 #define INSTRUCTION_SPECIFIER_FIELDS \
00024   uint16_t operands;
00025 
00026 #define INSTRUCTION_IDS     \
00027   uint16_t instructionIDs;
00028 
00029 #include "X86DisassemblerDecoderCommon.h"
00030 
00031 #undef INSTRUCTION_SPECIFIER_FIELDS
00032 #undef INSTRUCTION_IDS
00033 
00034 /*
00035  * Accessor functions for various fields of an Intel instruction
00036  */
00037 #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
00038 #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
00039 #define rmFromModRM(modRM)   ((modRM) & 0x7)
00040 #define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
00041 #define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
00042 #define baseFromSIB(sib)     ((sib) & 0x7)
00043 #define wFromREX(rex)        (((rex) & 0x8) >> 3)
00044 #define rFromREX(rex)        (((rex) & 0x4) >> 2)
00045 #define xFromREX(rex)        (((rex) & 0x2) >> 1)
00046 #define bFromREX(rex)        ((rex) & 0x1)
00047 
00048 #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
00049 #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
00050 #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
00051 #define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
00052 #define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
00053 #define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
00054 #define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
00055 #define ppFromVEX3of3(vex)      ((vex) & 0x3)
00056 
00057 #define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
00058 #define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
00059 #define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
00060 #define ppFromVEX2of2(vex)      ((vex) & 0x3)
00061 
00062 /*
00063  * These enums represent Intel registers for use by the decoder.
00064  */
00065 
00066 #define REGS_8BIT     \
00067   ENTRY(AL)           \
00068   ENTRY(CL)           \
00069   ENTRY(DL)           \
00070   ENTRY(BL)           \
00071   ENTRY(AH)           \
00072   ENTRY(CH)           \
00073   ENTRY(DH)           \
00074   ENTRY(BH)           \
00075   ENTRY(R8B)          \
00076   ENTRY(R9B)          \
00077   ENTRY(R10B)         \
00078   ENTRY(R11B)         \
00079   ENTRY(R12B)         \
00080   ENTRY(R13B)         \
00081   ENTRY(R14B)         \
00082   ENTRY(R15B)         \
00083   ENTRY(SPL)          \
00084   ENTRY(BPL)          \
00085   ENTRY(SIL)          \
00086   ENTRY(DIL)
00087 
00088 #define EA_BASES_16BIT  \
00089   ENTRY(BX_SI)          \
00090   ENTRY(BX_DI)          \
00091   ENTRY(BP_SI)          \
00092   ENTRY(BP_DI)          \
00093   ENTRY(SI)             \
00094   ENTRY(DI)             \
00095   ENTRY(BP)             \
00096   ENTRY(BX)             \
00097   ENTRY(R8W)            \
00098   ENTRY(R9W)            \
00099   ENTRY(R10W)           \
00100   ENTRY(R11W)           \
00101   ENTRY(R12W)           \
00102   ENTRY(R13W)           \
00103   ENTRY(R14W)           \
00104   ENTRY(R15W)
00105 
00106 #define REGS_16BIT    \
00107   ENTRY(AX)           \
00108   ENTRY(CX)           \
00109   ENTRY(DX)           \
00110   ENTRY(BX)           \
00111   ENTRY(SP)           \
00112   ENTRY(BP)           \
00113   ENTRY(SI)           \
00114   ENTRY(DI)           \
00115   ENTRY(R8W)          \
00116   ENTRY(R9W)          \
00117   ENTRY(R10W)         \
00118   ENTRY(R11W)         \
00119   ENTRY(R12W)         \
00120   ENTRY(R13W)         \
00121   ENTRY(R14W)         \
00122   ENTRY(R15W)
00123 
00124 #define EA_BASES_32BIT  \
00125   ENTRY(EAX)            \
00126   ENTRY(ECX)            \
00127   ENTRY(EDX)            \
00128   ENTRY(EBX)            \
00129   ENTRY(sib)            \
00130   ENTRY(EBP)            \
00131   ENTRY(ESI)            \
00132   ENTRY(EDI)            \
00133   ENTRY(R8D)            \
00134   ENTRY(R9D)            \
00135   ENTRY(R10D)           \
00136   ENTRY(R11D)           \
00137   ENTRY(R12D)           \
00138   ENTRY(R13D)           \
00139   ENTRY(R14D)           \
00140   ENTRY(R15D)
00141 
00142 #define REGS_32BIT  \
00143   ENTRY(EAX)        \
00144   ENTRY(ECX)        \
00145   ENTRY(EDX)        \
00146   ENTRY(EBX)        \
00147   ENTRY(ESP)        \
00148   ENTRY(EBP)        \
00149   ENTRY(ESI)        \
00150   ENTRY(EDI)        \
00151   ENTRY(R8D)        \
00152   ENTRY(R9D)        \
00153   ENTRY(R10D)       \
00154   ENTRY(R11D)       \
00155   ENTRY(R12D)       \
00156   ENTRY(R13D)       \
00157   ENTRY(R14D)       \
00158   ENTRY(R15D)
00159 
00160 #define EA_BASES_64BIT  \
00161   ENTRY(RAX)            \
00162   ENTRY(RCX)            \
00163   ENTRY(RDX)            \
00164   ENTRY(RBX)            \
00165   ENTRY(sib64)          \
00166   ENTRY(RBP)            \
00167   ENTRY(RSI)            \
00168   ENTRY(RDI)            \
00169   ENTRY(R8)             \
00170   ENTRY(R9)             \
00171   ENTRY(R10)            \
00172   ENTRY(R11)            \
00173   ENTRY(R12)            \
00174   ENTRY(R13)            \
00175   ENTRY(R14)            \
00176   ENTRY(R15)
00177 
00178 #define REGS_64BIT  \
00179   ENTRY(RAX)        \
00180   ENTRY(RCX)        \
00181   ENTRY(RDX)        \
00182   ENTRY(RBX)        \
00183   ENTRY(RSP)        \
00184   ENTRY(RBP)        \
00185   ENTRY(RSI)        \
00186   ENTRY(RDI)        \
00187   ENTRY(R8)         \
00188   ENTRY(R9)         \
00189   ENTRY(R10)        \
00190   ENTRY(R11)        \
00191   ENTRY(R12)        \
00192   ENTRY(R13)        \
00193   ENTRY(R14)        \
00194   ENTRY(R15)
00195 
00196 #define REGS_MMX  \
00197   ENTRY(MM0)      \
00198   ENTRY(MM1)      \
00199   ENTRY(MM2)      \
00200   ENTRY(MM3)      \
00201   ENTRY(MM4)      \
00202   ENTRY(MM5)      \
00203   ENTRY(MM6)      \
00204   ENTRY(MM7)
00205 
00206 #define REGS_XMM  \
00207   ENTRY(XMM0)     \
00208   ENTRY(XMM1)     \
00209   ENTRY(XMM2)     \
00210   ENTRY(XMM3)     \
00211   ENTRY(XMM4)     \
00212   ENTRY(XMM5)     \
00213   ENTRY(XMM6)     \
00214   ENTRY(XMM7)     \
00215   ENTRY(XMM8)     \
00216   ENTRY(XMM9)     \
00217   ENTRY(XMM10)    \
00218   ENTRY(XMM11)    \
00219   ENTRY(XMM12)    \
00220   ENTRY(XMM13)    \
00221   ENTRY(XMM14)    \
00222   ENTRY(XMM15)
00223 
00224 #define REGS_YMM  \
00225   ENTRY(YMM0)     \
00226   ENTRY(YMM1)     \
00227   ENTRY(YMM2)     \
00228   ENTRY(YMM3)     \
00229   ENTRY(YMM4)     \
00230   ENTRY(YMM5)     \
00231   ENTRY(YMM6)     \
00232   ENTRY(YMM7)     \
00233   ENTRY(YMM8)     \
00234   ENTRY(YMM9)     \
00235   ENTRY(YMM10)    \
00236   ENTRY(YMM11)    \
00237   ENTRY(YMM12)    \
00238   ENTRY(YMM13)    \
00239   ENTRY(YMM14)    \
00240   ENTRY(YMM15)
00241 
00242 #define REGS_SEGMENT \
00243   ENTRY(ES)          \
00244   ENTRY(CS)          \
00245   ENTRY(SS)          \
00246   ENTRY(DS)          \
00247   ENTRY(FS)          \
00248   ENTRY(GS)
00249 
00250 #define REGS_DEBUG  \
00251   ENTRY(DR0)        \
00252   ENTRY(DR1)        \
00253   ENTRY(DR2)        \
00254   ENTRY(DR3)        \
00255   ENTRY(DR4)        \
00256   ENTRY(DR5)        \
00257   ENTRY(DR6)        \
00258   ENTRY(DR7)
00259 
00260 #define REGS_CONTROL  \
00261   ENTRY(CR0)          \
00262   ENTRY(CR1)          \
00263   ENTRY(CR2)          \
00264   ENTRY(CR3)          \
00265   ENTRY(CR4)          \
00266   ENTRY(CR5)          \
00267   ENTRY(CR6)          \
00268   ENTRY(CR7)          \
00269   ENTRY(CR8)
00270 
00271 #define ALL_EA_BASES  \
00272   EA_BASES_16BIT      \
00273   EA_BASES_32BIT      \
00274   EA_BASES_64BIT
00275 
00276 #define ALL_SIB_BASES \
00277   REGS_32BIT          \
00278   REGS_64BIT
00279 
00280 #define ALL_REGS      \
00281   REGS_8BIT           \
00282   REGS_16BIT          \
00283   REGS_32BIT          \
00284   REGS_64BIT          \
00285   REGS_MMX            \
00286   REGS_XMM            \
00287   REGS_YMM            \
00288   REGS_SEGMENT        \
00289   REGS_DEBUG          \
00290   REGS_CONTROL        \
00291   ENTRY(RIP)
00292 
00293 /*
00294  * EABase - All possible values of the base field for effective-address
00295  *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
00296  *   distinguish between bases (EA_BASE_*) and registers that just happen to be
00297  *   referred to when Mod == 0b11 (EA_REG_*).
00298  */
00299 typedef enum {
00300   EA_BASE_NONE,
00301 #define ENTRY(x) EA_BASE_##x,
00302   ALL_EA_BASES
00303 #undef ENTRY
00304 #define ENTRY(x) EA_REG_##x,
00305   ALL_REGS
00306 #undef ENTRY
00307   EA_max
00308 } EABase;
00309 
00310 /*
00311  * SIBIndex - All possible values of the SIB index field.
00312  *   Borrows entries from ALL_EA_BASES with the special case that
00313  *   sib is synonymous with NONE.
00314  * Vector SIB: index can be XMM or YMM.
00315  */
00316 typedef enum {
00317   SIB_INDEX_NONE,
00318 #define ENTRY(x) SIB_INDEX_##x,
00319   ALL_EA_BASES
00320   REGS_XMM
00321   REGS_YMM
00322 #undef ENTRY
00323   SIB_INDEX_max
00324 } SIBIndex;
00325 
00326 /*
00327  * SIBBase - All possible values of the SIB base field.
00328  */
00329 typedef enum {
00330   SIB_BASE_NONE,
00331 #define ENTRY(x) SIB_BASE_##x,
00332   ALL_SIB_BASES
00333 #undef ENTRY
00334   SIB_BASE_max
00335 } SIBBase;
00336 
00337 /*
00338  * EADisplacement - Possible displacement types for effective-address
00339  *   computations.
00340  */
00341 typedef enum {
00342   EA_DISP_NONE,
00343   EA_DISP_8,
00344   EA_DISP_16,
00345   EA_DISP_32
00346 } EADisplacement;
00347 
00348 /*
00349  * Reg - All possible values of the reg field in the ModR/M byte.
00350  */
00351 typedef enum {
00352 #define ENTRY(x) MODRM_REG_##x,
00353   ALL_REGS
00354 #undef ENTRY
00355   MODRM_REG_max
00356 } Reg;
00357 
00358 /*
00359  * SegmentOverride - All possible segment overrides.
00360  */
00361 typedef enum {
00362   SEG_OVERRIDE_NONE,
00363   SEG_OVERRIDE_CS,
00364   SEG_OVERRIDE_SS,
00365   SEG_OVERRIDE_DS,
00366   SEG_OVERRIDE_ES,
00367   SEG_OVERRIDE_FS,
00368   SEG_OVERRIDE_GS,
00369   SEG_OVERRIDE_max
00370 } SegmentOverride;
00371 
00372 /*
00373  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
00374  */
00375 
00376 typedef enum {
00377   VEX_LOB_0F = 0x1,
00378   VEX_LOB_0F38 = 0x2,
00379   VEX_LOB_0F3A = 0x3
00380 } VEXLeadingOpcodeByte;
00381 
00382 /*
00383  * VEXPrefixCode - Possible values for the VEX.pp field
00384  */
00385 
00386 typedef enum {
00387   VEX_PREFIX_NONE = 0x0,
00388   VEX_PREFIX_66 = 0x1,
00389   VEX_PREFIX_F3 = 0x2,
00390   VEX_PREFIX_F2 = 0x3
00391 } VEXPrefixCode;
00392 
00393 typedef uint8_t BOOL;
00394 
00395 /*
00396  * byteReader_t - Type for the byte reader that the consumer must provide to
00397  *   the decoder.  Reads a single byte from the instruction's address space.
00398  * @param arg     - A baton that the consumer can associate with any internal
00399  *                  state that it needs.
00400  * @param byte    - A pointer to a single byte in memory that should be set to
00401  *                  contain the value at address.
00402  * @param address - The address in the instruction's address space that should
00403  *                  be read from.
00404  * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
00405  */
00406 typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
00407 
00408 /*
00409  * dlog_t - Type for the logging function that the consumer can provide to
00410  *   get debugging output from the decoder.
00411  * @param arg     - A baton that the consumer can associate with any internal
00412  *                  state that it needs.
00413  * @param log     - A string that contains the message.  Will be reused after
00414  *                  the logger returns.
00415  */
00416 typedef void (*dlog_t)(void* arg, const char *log);
00417 
00418 /*
00419  * The x86 internal instruction, which is produced by the decoder.
00420  */
00421 struct InternalInstruction {
00422   /* Reader interface (C) */
00423   byteReader_t reader;
00424   /* Opaque value passed to the reader */
00425   const void* readerArg;
00426   /* The address of the next byte to read via the reader */
00427   uint64_t readerCursor;
00428 
00429   /* Logger interface (C) */
00430   dlog_t dlog;
00431   /* Opaque value passed to the logger */
00432   void* dlogArg;
00433 
00434   /* General instruction information */
00435 
00436   /* The mode to disassemble for (64-bit, protected, real) */
00437   DisassemblerMode mode;
00438   /* The start of the instruction, usable with the reader */
00439   uint64_t startLocation;
00440   /* The length of the instruction, in bytes */
00441   size_t length;
00442 
00443   /* Prefix state */
00444 
00445   /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
00446   uint8_t prefixPresent[0x100];
00447   /* contains the location (for use with the reader) of the prefix byte */
00448   uint64_t prefixLocations[0x100];
00449   /* The value of the VEX prefix, if present */
00450   uint8_t vexPrefix[3];
00451   /* The length of the VEX prefix (0 if not present) */
00452   uint8_t vexSize;
00453   /* The value of the REX prefix, if present */
00454   uint8_t rexPrefix;
00455   /* The location where a mandatory prefix would have to be (i.e., right before
00456      the opcode, or right before the REX prefix if one is present) */
00457   uint64_t necessaryPrefixLocation;
00458   /* The segment override type */
00459   SegmentOverride segmentOverride;
00460 
00461   /* Sizes of various critical pieces of data, in bytes */
00462   uint8_t registerSize;
00463   uint8_t addressSize;
00464   uint8_t displacementSize;
00465   uint8_t immediateSize;
00466 
00467   /* Offsets from the start of the instruction to the pieces of data, which is
00468      needed to find relocation entries for adding symbolic operands */
00469   uint8_t displacementOffset;
00470   uint8_t immediateOffset;
00471 
00472   /* opcode state */
00473 
00474   /* The value of the two-byte escape prefix (usually 0x0f) */
00475   uint8_t twoByteEscape;
00476   /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
00477   uint8_t threeByteEscape;
00478   /* The last byte of the opcode, not counting any ModR/M extension */
00479   uint8_t opcode;
00480   /* The ModR/M byte of the instruction, if it is an opcode extension */
00481   uint8_t modRMExtension;
00482 
00483   /* decode state */
00484 
00485   /* The type of opcode, used for indexing into the array of decode tables */
00486   OpcodeType opcodeType;
00487   /* The instruction ID, extracted from the decode table */
00488   uint16_t instructionID;
00489   /* The specifier for the instruction, from the instruction info table */
00490   const struct InstructionSpecifier *spec;
00491 
00492   /* state for additional bytes, consumed during operand decode.  Pattern:
00493      consumed___ indicates that the byte was already consumed and does not
00494      need to be consumed again */
00495 
00496   /* The VEX.vvvv field, which contains a third register operand for some AVX
00497      instructions */
00498   Reg                           vvvv;
00499 
00500   /* The ModR/M byte, which contains most register operands and some portion of
00501      all memory operands */
00502   BOOL                          consumedModRM;
00503   uint8_t                       modRM;
00504 
00505   /* The SIB byte, used for more complex 32- or 64-bit memory operands */
00506   BOOL                          consumedSIB;
00507   uint8_t                       sib;
00508 
00509   /* The displacement, used for memory operands */
00510   BOOL                          consumedDisplacement;
00511   int32_t                       displacement;
00512 
00513   /* Immediates.  There can be two in some cases */
00514   uint8_t                       numImmediatesConsumed;
00515   uint8_t                       numImmediatesTranslated;
00516   uint64_t                      immediates[2];
00517 
00518   /* A register or immediate operand encoded into the opcode */
00519   BOOL                          consumedOpcodeModifier;
00520   uint8_t                       opcodeModifier;
00521   Reg                           opcodeRegister;
00522 
00523   /* Portions of the ModR/M byte */
00524 
00525   /* These fields determine the allowable values for the ModR/M fields, which
00526      depend on operand and address widths */
00527   EABase                        eaBaseBase;
00528   EABase                        eaRegBase;
00529   Reg                           regBase;
00530 
00531   /* The Mod and R/M fields can encode a base for an effective address, or a
00532      register.  These are separated into two fields here */
00533   EABase                        eaBase;
00534   EADisplacement                eaDisplacement;
00535   /* The reg field always encodes a register */
00536   Reg                           reg;
00537 
00538   /* SIB state */
00539   SIBIndex                      sibIndex;
00540   uint8_t                       sibScale;
00541   SIBBase                       sibBase;
00542 
00543   const struct OperandSpecifier *operands;
00544 };
00545 
00546 /* decodeInstruction - Decode one instruction and store the decoding results in
00547  *   a buffer provided by the consumer.
00548  * @param insn      - The buffer to store the instruction in.  Allocated by the
00549  *                    consumer.
00550  * @param reader    - The byteReader_t for the bytes to be read.
00551  * @param readerArg - An argument to pass to the reader for storing context
00552  *                    specific to the consumer.  May be NULL.
00553  * @param logger    - The dlog_t to be used in printing status messages from the
00554  *                    disassembler.  May be NULL.
00555  * @param loggerArg - An argument to pass to the logger for storing context
00556  *                    specific to the logger.  May be NULL.
00557  * @param startLoc  - The address (in the reader's address space) of the first
00558  *                    byte in the instruction.
00559  * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
00560  * @return          - Nonzero if there was an error during decode, 0 otherwise.
00561  */
00562 int decodeInstruction(struct InternalInstruction* insn,
00563                       byteReader_t reader,
00564                       const void* readerArg,
00565                       dlog_t logger,
00566                       void* loggerArg,
00567                       const void* miiArg,
00568                       uint64_t startLoc,
00569                       DisassemblerMode mode);
00570 
00571 /* x86DisassemblerDebug - C-accessible function for printing a message to
00572  *   debugs()
00573  * @param file  - The name of the file printing the debug message.
00574  * @param line  - The line number that printed the debug message.
00575  * @param s     - The message to print.
00576  */
00577 
00578 void x86DisassemblerDebug(const char *file,
00579                           unsigned line,
00580                           const char *s);
00581 
00582 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
00583 
00584 #ifdef __cplusplus
00585 }
00586 #endif
00587 
00588 #endif