LLVM API Documentation

X86DisassemblerDecoder.c
Go to the documentation of this file.
00001 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
00002  *
00003  *                     The LLVM Compiler Infrastructure
00004  *
00005  * This file is distributed under the University of Illinois Open Source
00006  * License. See LICENSE.TXT for details.
00007  *
00008  *===----------------------------------------------------------------------===*
00009  *
00010  * This file is part of the X86 Disassembler.
00011  * It contains the implementation of the instruction decoder.
00012  * Documentation for the disassembler can be found in X86Disassembler.h.
00013  *
00014  *===----------------------------------------------------------------------===*/
00015 
00016 #include <stdarg.h>   /* for va_*()       */
00017 #include <stdio.h>    /* for vsnprintf()  */
00018 #include <stdlib.h>   /* for exit()       */
00019 #include <string.h>   /* for memset()     */
00020 
00021 #include "X86DisassemblerDecoder.h"
00022 
00023 #include "X86GenDisassemblerTables.inc"
00024 
00025 #define TRUE  1
00026 #define FALSE 0
00027 
00028 typedef int8_t bool;
00029 
00030 #ifndef NDEBUG
00031 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
00032 #else
00033 #define debug(s) do { } while (0)
00034 #endif
00035 
00036 
00037 /*
00038  * contextForAttrs - Client for the instruction context table.  Takes a set of
00039  *   attributes and returns the appropriate decode context.
00040  *
00041  * @param attrMask  - Attributes, from the enumeration attributeBits.
00042  * @return          - The InstructionContext to use when looking up an
00043  *                    an instruction with these attributes.
00044  */
00045 static InstructionContext contextForAttrs(uint8_t attrMask) {
00046   return CONTEXTS_SYM[attrMask];
00047 }
00048 
00049 /*
00050  * modRMRequired - Reads the appropriate instruction table to determine whether
00051  *   the ModR/M byte is required to decode a particular instruction.
00052  *
00053  * @param type        - The opcode type (i.e., how many bytes it has).
00054  * @param insnContext - The context for the instruction, as returned by
00055  *                      contextForAttrs.
00056  * @param opcode      - The last byte of the instruction's opcode, not counting
00057  *                      ModR/M extensions and escapes.
00058  * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
00059  */
00060 static int modRMRequired(OpcodeType type,
00061                          InstructionContext insnContext,
00062                          uint8_t opcode) {
00063   const struct ContextDecision* decision = 0;
00064 
00065   switch (type) {
00066   case ONEBYTE:
00067     decision = &ONEBYTE_SYM;
00068     break;
00069   case TWOBYTE:
00070     decision = &TWOBYTE_SYM;
00071     break;
00072   case THREEBYTE_38:
00073     decision = &THREEBYTE38_SYM;
00074     break;
00075   case THREEBYTE_3A:
00076     decision = &THREEBYTE3A_SYM;
00077     break;
00078   case THREEBYTE_A6:
00079     decision = &THREEBYTEA6_SYM;
00080     break;
00081   case THREEBYTE_A7:
00082     decision = &THREEBYTEA7_SYM;
00083     break;
00084   }
00085 
00086   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
00087     modrm_type != MODRM_ONEENTRY;
00088 }
00089 
00090 /*
00091  * decode - Reads the appropriate instruction table to obtain the unique ID of
00092  *   an instruction.
00093  *
00094  * @param type        - See modRMRequired().
00095  * @param insnContext - See modRMRequired().
00096  * @param opcode      - See modRMRequired().
00097  * @param modRM       - The ModR/M byte if required, or any value if not.
00098  * @return            - The UID of the instruction, or 0 on failure.
00099  */
00100 static InstrUID decode(OpcodeType type,
00101                        InstructionContext insnContext,
00102                        uint8_t opcode,
00103                        uint8_t modRM) {
00104   const struct ModRMDecision* dec = 0;
00105 
00106   switch (type) {
00107   case ONEBYTE:
00108     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00109     break;
00110   case TWOBYTE:
00111     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00112     break;
00113   case THREEBYTE_38:
00114     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00115     break;
00116   case THREEBYTE_3A:
00117     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00118     break;
00119   case THREEBYTE_A6:
00120     dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00121     break;
00122   case THREEBYTE_A7:
00123     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
00124     break;
00125   }
00126 
00127   switch (dec->modrm_type) {
00128   default:
00129     debug("Corrupt table!  Unknown modrm_type");
00130     return 0;
00131   case MODRM_ONEENTRY:
00132     return modRMTable[dec->instructionIDs];
00133   case MODRM_SPLITRM:
00134     if (modFromModRM(modRM) == 0x3)
00135       return modRMTable[dec->instructionIDs+1];
00136     return modRMTable[dec->instructionIDs];
00137   case MODRM_SPLITREG:
00138     if (modFromModRM(modRM) == 0x3)
00139       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
00140     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
00141   case MODRM_SPLITMISC:
00142     if (modFromModRM(modRM) == 0x3)
00143       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
00144     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
00145   case MODRM_FULL:
00146     return modRMTable[dec->instructionIDs+modRM];
00147   }
00148 }
00149 
00150 /*
00151  * specifierForUID - Given a UID, returns the name and operand specification for
00152  *   that instruction.
00153  *
00154  * @param uid - The unique ID for the instruction.  This should be returned by
00155  *              decode(); specifierForUID will not check bounds.
00156  * @return    - A pointer to the specification for that instruction.
00157  */
00158 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
00159   return &INSTRUCTIONS_SYM[uid];
00160 }
00161 
00162 /*
00163  * consumeByte - Uses the reader function provided by the user to consume one
00164  *   byte from the instruction's memory and advance the cursor.
00165  *
00166  * @param insn  - The instruction with the reader function to use.  The cursor
00167  *                for this instruction is advanced.
00168  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
00169  *                with the data read.
00170  * @return      - 0 if the read was successful; nonzero otherwise.
00171  */
00172 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
00173   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
00174 
00175   if (!ret)
00176     ++(insn->readerCursor);
00177 
00178   return ret;
00179 }
00180 
00181 /*
00182  * lookAtByte - Like consumeByte, but does not advance the cursor.
00183  *
00184  * @param insn  - See consumeByte().
00185  * @param byte  - See consumeByte().
00186  * @return      - See consumeByte().
00187  */
00188 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
00189   return insn->reader(insn->readerArg, byte, insn->readerCursor);
00190 }
00191 
00192 static void unconsumeByte(struct InternalInstruction* insn) {
00193   insn->readerCursor--;
00194 }
00195 
00196 #define CONSUME_FUNC(name, type)                                  \
00197   static int name(struct InternalInstruction* insn, type* ptr) {  \
00198     type combined = 0;                                            \
00199     unsigned offset;                                              \
00200     for (offset = 0; offset < sizeof(type); ++offset) {           \
00201       uint8_t byte;                                               \
00202       int ret = insn->reader(insn->readerArg,                     \
00203                              &byte,                               \
00204                              insn->readerCursor + offset);        \
00205       if (ret)                                                    \
00206         return ret;                                               \
00207       combined = combined | ((uint64_t)byte << (offset * 8));     \
00208     }                                                             \
00209     *ptr = combined;                                              \
00210     insn->readerCursor += sizeof(type);                           \
00211     return 0;                                                     \
00212   }
00213 
00214 /*
00215  * consume* - Use the reader function provided by the user to consume data
00216  *   values of various sizes from the instruction's memory and advance the
00217  *   cursor appropriately.  These readers perform endian conversion.
00218  *
00219  * @param insn    - See consumeByte().
00220  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
00221  *                  be populated with the data read.
00222  * @return        - See consumeByte().
00223  */
00224 CONSUME_FUNC(consumeInt8, int8_t)
00225 CONSUME_FUNC(consumeInt16, int16_t)
00226 CONSUME_FUNC(consumeInt32, int32_t)
00227 CONSUME_FUNC(consumeUInt16, uint16_t)
00228 CONSUME_FUNC(consumeUInt32, uint32_t)
00229 CONSUME_FUNC(consumeUInt64, uint64_t)
00230 
00231 /*
00232  * dbgprintf - Uses the logging function provided by the user to log a single
00233  *   message, typically without a carriage-return.
00234  *
00235  * @param insn    - The instruction containing the logging function.
00236  * @param format  - See printf().
00237  * @param ...     - See printf().
00238  */
00239 static void dbgprintf(struct InternalInstruction* insn,
00240                       const char* format,
00241                       ...) {
00242   char buffer[256];
00243   va_list ap;
00244 
00245   if (!insn->dlog)
00246     return;
00247 
00248   va_start(ap, format);
00249   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
00250   va_end(ap);
00251 
00252   insn->dlog(insn->dlogArg, buffer);
00253 
00254   return;
00255 }
00256 
00257 /*
00258  * setPrefixPresent - Marks that a particular prefix is present at a particular
00259  *   location.
00260  *
00261  * @param insn      - The instruction to be marked as having the prefix.
00262  * @param prefix    - The prefix that is present.
00263  * @param location  - The location where the prefix is located (in the address
00264  *                    space of the instruction's reader).
00265  */
00266 static void setPrefixPresent(struct InternalInstruction* insn,
00267                                     uint8_t prefix,
00268                                     uint64_t location)
00269 {
00270   insn->prefixPresent[prefix] = 1;
00271   insn->prefixLocations[prefix] = location;
00272 }
00273 
00274 /*
00275  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
00276  *   present at a given location.
00277  *
00278  * @param insn      - The instruction to be queried.
00279  * @param prefix    - The prefix.
00280  * @param location  - The location to query.
00281  * @return          - Whether the prefix is at that location.
00282  */
00283 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
00284                                uint8_t prefix,
00285                                uint64_t location)
00286 {
00287   if (insn->prefixPresent[prefix] == 1 &&
00288      insn->prefixLocations[prefix] == location)
00289     return TRUE;
00290   else
00291     return FALSE;
00292 }
00293 
00294 /*
00295  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
00296  *   instruction as having them.  Also sets the instruction's default operand,
00297  *   address, and other relevant data sizes to report operands correctly.
00298  *
00299  * @param insn  - The instruction whose prefixes are to be read.
00300  * @return      - 0 if the instruction could be read until the end of the prefix
00301  *                bytes, and no prefixes conflicted; nonzero otherwise.
00302  */
00303 static int readPrefixes(struct InternalInstruction* insn) {
00304   BOOL isPrefix = TRUE;
00305   BOOL prefixGroups[4] = { FALSE };
00306   uint64_t prefixLocation;
00307   uint8_t byte = 0;
00308 
00309   BOOL hasAdSize = FALSE;
00310   BOOL hasOpSize = FALSE;
00311 
00312   dbgprintf(insn, "readPrefixes()");
00313 
00314   while (isPrefix) {
00315     prefixLocation = insn->readerCursor;
00316 
00317     if (consumeByte(insn, &byte))
00318       return -1;
00319 
00320     /*
00321      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
00322      * break and let it be disassembled as a normal "instruction".
00323      */
00324     if (insn->readerCursor - 1 == insn->startLocation
00325         && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
00326       uint8_t nextByte;
00327       if (byte == 0xf0)
00328         break;
00329       if (lookAtByte(insn, &nextByte))
00330         return -1;
00331       if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
00332         if (consumeByte(insn, &nextByte))
00333           return -1;
00334         if (lookAtByte(insn, &nextByte))
00335           return -1;
00336         unconsumeByte(insn);
00337       }
00338       if (nextByte != 0x0f && nextByte != 0x90)
00339         break;
00340     }
00341 
00342     switch (byte) {
00343     case 0xf0:  /* LOCK */
00344     case 0xf2:  /* REPNE/REPNZ */
00345     case 0xf3:  /* REP or REPE/REPZ */
00346       if (prefixGroups[0])
00347         dbgprintf(insn, "Redundant Group 1 prefix");
00348       prefixGroups[0] = TRUE;
00349       setPrefixPresent(insn, byte, prefixLocation);
00350       break;
00351     case 0x2e:  /* CS segment override -OR- Branch not taken */
00352     case 0x36:  /* SS segment override -OR- Branch taken */
00353     case 0x3e:  /* DS segment override */
00354     case 0x26:  /* ES segment override */
00355     case 0x64:  /* FS segment override */
00356     case 0x65:  /* GS segment override */
00357       switch (byte) {
00358       case 0x2e:
00359         insn->segmentOverride = SEG_OVERRIDE_CS;
00360         break;
00361       case 0x36:
00362         insn->segmentOverride = SEG_OVERRIDE_SS;
00363         break;
00364       case 0x3e:
00365         insn->segmentOverride = SEG_OVERRIDE_DS;
00366         break;
00367       case 0x26:
00368         insn->segmentOverride = SEG_OVERRIDE_ES;
00369         break;
00370       case 0x64:
00371         insn->segmentOverride = SEG_OVERRIDE_FS;
00372         break;
00373       case 0x65:
00374         insn->segmentOverride = SEG_OVERRIDE_GS;
00375         break;
00376       default:
00377         debug("Unhandled override");
00378         return -1;
00379       }
00380       if (prefixGroups[1])
00381         dbgprintf(insn, "Redundant Group 2 prefix");
00382       prefixGroups[1] = TRUE;
00383       setPrefixPresent(insn, byte, prefixLocation);
00384       break;
00385     case 0x66:  /* Operand-size override */
00386       if (prefixGroups[2])
00387         dbgprintf(insn, "Redundant Group 3 prefix");
00388       prefixGroups[2] = TRUE;
00389       hasOpSize = TRUE;
00390       setPrefixPresent(insn, byte, prefixLocation);
00391       break;
00392     case 0x67:  /* Address-size override */
00393       if (prefixGroups[3])
00394         dbgprintf(insn, "Redundant Group 4 prefix");
00395       prefixGroups[3] = TRUE;
00396       hasAdSize = TRUE;
00397       setPrefixPresent(insn, byte, prefixLocation);
00398       break;
00399     default:    /* Not a prefix byte */
00400       isPrefix = FALSE;
00401       break;
00402     }
00403 
00404     if (isPrefix)
00405       dbgprintf(insn, "Found prefix 0x%hhx", byte);
00406   }
00407 
00408   insn->vexSize = 0;
00409 
00410   if (byte == 0xc4) {
00411     uint8_t byte1;
00412 
00413     if (lookAtByte(insn, &byte1)) {
00414       dbgprintf(insn, "Couldn't read second byte of VEX");
00415       return -1;
00416     }
00417 
00418     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
00419       insn->vexSize = 3;
00420       insn->necessaryPrefixLocation = insn->readerCursor - 1;
00421     }
00422     else {
00423       unconsumeByte(insn);
00424       insn->necessaryPrefixLocation = insn->readerCursor - 1;
00425     }
00426 
00427     if (insn->vexSize == 3) {
00428       insn->vexPrefix[0] = byte;
00429       consumeByte(insn, &insn->vexPrefix[1]);
00430       consumeByte(insn, &insn->vexPrefix[2]);
00431 
00432       /* We simulate the REX prefix for simplicity's sake */
00433 
00434       if (insn->mode == MODE_64BIT) {
00435         insn->rexPrefix = 0x40
00436                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
00437                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
00438                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
00439                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
00440       }
00441 
00442       switch (ppFromVEX3of3(insn->vexPrefix[2]))
00443       {
00444       default:
00445         break;
00446       case VEX_PREFIX_66:
00447         hasOpSize = TRUE;
00448         break;
00449       }
00450 
00451       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
00452     }
00453   }
00454   else if (byte == 0xc5) {
00455     uint8_t byte1;
00456 
00457     if (lookAtByte(insn, &byte1)) {
00458       dbgprintf(insn, "Couldn't read second byte of VEX");
00459       return -1;
00460     }
00461 
00462     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
00463       insn->vexSize = 2;
00464     }
00465     else {
00466       unconsumeByte(insn);
00467     }
00468 
00469     if (insn->vexSize == 2) {
00470       insn->vexPrefix[0] = byte;
00471       consumeByte(insn, &insn->vexPrefix[1]);
00472 
00473       if (insn->mode == MODE_64BIT) {
00474         insn->rexPrefix = 0x40
00475                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
00476       }
00477 
00478       switch (ppFromVEX2of2(insn->vexPrefix[1]))
00479       {
00480       default:
00481         break;
00482       case VEX_PREFIX_66:
00483         hasOpSize = TRUE;
00484         break;
00485       }
00486 
00487       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
00488     }
00489   }
00490   else {
00491     if (insn->mode == MODE_64BIT) {
00492       if ((byte & 0xf0) == 0x40) {
00493         uint8_t opcodeByte;
00494 
00495         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
00496           dbgprintf(insn, "Redundant REX prefix");
00497           return -1;
00498         }
00499 
00500         insn->rexPrefix = byte;
00501         insn->necessaryPrefixLocation = insn->readerCursor - 2;
00502 
00503         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
00504       } else {
00505         unconsumeByte(insn);
00506         insn->necessaryPrefixLocation = insn->readerCursor - 1;
00507       }
00508     } else {
00509       unconsumeByte(insn);
00510       insn->necessaryPrefixLocation = insn->readerCursor - 1;
00511     }
00512   }
00513 
00514   if (insn->mode == MODE_16BIT) {
00515     insn->registerSize       = (hasOpSize ? 4 : 2);
00516     insn->addressSize        = (hasAdSize ? 4 : 2);
00517     insn->displacementSize   = (hasAdSize ? 4 : 2);
00518     insn->immediateSize      = (hasOpSize ? 4 : 2);
00519   } else if (insn->mode == MODE_32BIT) {
00520     insn->registerSize       = (hasOpSize ? 2 : 4);
00521     insn->addressSize        = (hasAdSize ? 2 : 4);
00522     insn->displacementSize   = (hasAdSize ? 2 : 4);
00523     insn->immediateSize      = (hasOpSize ? 2 : 4);
00524   } else if (insn->mode == MODE_64BIT) {
00525     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
00526       insn->registerSize       = 8;
00527       insn->addressSize        = (hasAdSize ? 4 : 8);
00528       insn->displacementSize   = 4;
00529       insn->immediateSize      = 4;
00530     } else if (insn->rexPrefix) {
00531       insn->registerSize       = (hasOpSize ? 2 : 4);
00532       insn->addressSize        = (hasAdSize ? 4 : 8);
00533       insn->displacementSize   = (hasOpSize ? 2 : 4);
00534       insn->immediateSize      = (hasOpSize ? 2 : 4);
00535     } else {
00536       insn->registerSize       = (hasOpSize ? 2 : 4);
00537       insn->addressSize        = (hasAdSize ? 4 : 8);
00538       insn->displacementSize   = (hasOpSize ? 2 : 4);
00539       insn->immediateSize      = (hasOpSize ? 2 : 4);
00540     }
00541   }
00542 
00543   return 0;
00544 }
00545 
00546 /*
00547  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
00548  *   extended or escape opcodes).
00549  *
00550  * @param insn  - The instruction whose opcode is to be read.
00551  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
00552  */
00553 static int readOpcode(struct InternalInstruction* insn) {
00554   /* Determine the length of the primary opcode */
00555 
00556   uint8_t current;
00557 
00558   dbgprintf(insn, "readOpcode()");
00559 
00560   insn->opcodeType = ONEBYTE;
00561 
00562   if (insn->vexSize == 3)
00563   {
00564     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
00565     {
00566     default:
00567       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
00568       return -1;
00569     case 0:
00570       break;
00571     case VEX_LOB_0F:
00572       insn->twoByteEscape = 0x0f;
00573       insn->opcodeType = TWOBYTE;
00574       return consumeByte(insn, &insn->opcode);
00575     case VEX_LOB_0F38:
00576       insn->twoByteEscape = 0x0f;
00577       insn->threeByteEscape = 0x38;
00578       insn->opcodeType = THREEBYTE_38;
00579       return consumeByte(insn, &insn->opcode);
00580     case VEX_LOB_0F3A:
00581       insn->twoByteEscape = 0x0f;
00582       insn->threeByteEscape = 0x3a;
00583       insn->opcodeType = THREEBYTE_3A;
00584       return consumeByte(insn, &insn->opcode);
00585     }
00586   }
00587   else if (insn->vexSize == 2)
00588   {
00589     insn->twoByteEscape = 0x0f;
00590     insn->opcodeType = TWOBYTE;
00591     return consumeByte(insn, &insn->opcode);
00592   }
00593 
00594   if (consumeByte(insn, &current))
00595     return -1;
00596 
00597   if (current == 0x0f) {
00598     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
00599 
00600     insn->twoByteEscape = current;
00601 
00602     if (consumeByte(insn, &current))
00603       return -1;
00604 
00605     if (current == 0x38) {
00606       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
00607 
00608       insn->threeByteEscape = current;
00609 
00610       if (consumeByte(insn, &current))
00611         return -1;
00612 
00613       insn->opcodeType = THREEBYTE_38;
00614     } else if (current == 0x3a) {
00615       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
00616 
00617       insn->threeByteEscape = current;
00618 
00619       if (consumeByte(insn, &current))
00620         return -1;
00621 
00622       insn->opcodeType = THREEBYTE_3A;
00623     } else if (current == 0xa6) {
00624       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
00625 
00626       insn->threeByteEscape = current;
00627 
00628       if (consumeByte(insn, &current))
00629         return -1;
00630 
00631       insn->opcodeType = THREEBYTE_A6;
00632     } else if (current == 0xa7) {
00633       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
00634 
00635       insn->threeByteEscape = current;
00636 
00637       if (consumeByte(insn, &current))
00638         return -1;
00639 
00640       insn->opcodeType = THREEBYTE_A7;
00641     } else {
00642       dbgprintf(insn, "Didn't find a three-byte escape prefix");
00643 
00644       insn->opcodeType = TWOBYTE;
00645     }
00646   }
00647 
00648   /*
00649    * At this point we have consumed the full opcode.
00650    * Anything we consume from here on must be unconsumed.
00651    */
00652 
00653   insn->opcode = current;
00654 
00655   return 0;
00656 }
00657 
00658 static int readModRM(struct InternalInstruction* insn);
00659 
00660 /*
00661  * getIDWithAttrMask - Determines the ID of an instruction, consuming
00662  *   the ModR/M byte as appropriate for extended and escape opcodes,
00663  *   and using a supplied attribute mask.
00664  *
00665  * @param instructionID - A pointer whose target is filled in with the ID of the
00666  *                        instruction.
00667  * @param insn          - The instruction whose ID is to be determined.
00668  * @param attrMask      - The attribute mask to search.
00669  * @return              - 0 if the ModR/M could be read when needed or was not
00670  *                        needed; nonzero otherwise.
00671  */
00672 static int getIDWithAttrMask(uint16_t* instructionID,
00673                              struct InternalInstruction* insn,
00674                              uint8_t attrMask) {
00675   BOOL hasModRMExtension;
00676 
00677   uint8_t instructionClass;
00678 
00679   instructionClass = contextForAttrs(attrMask);
00680 
00681   hasModRMExtension = modRMRequired(insn->opcodeType,
00682                                     instructionClass,
00683                                     insn->opcode);
00684 
00685   if (hasModRMExtension) {
00686     if (readModRM(insn))
00687       return -1;
00688 
00689     *instructionID = decode(insn->opcodeType,
00690                             instructionClass,
00691                             insn->opcode,
00692                             insn->modRM);
00693   } else {
00694     *instructionID = decode(insn->opcodeType,
00695                             instructionClass,
00696                             insn->opcode,
00697                             0);
00698   }
00699 
00700   return 0;
00701 }
00702 
00703 /*
00704  * is16BitEquivalent - Determines whether two instruction names refer to
00705  * equivalent instructions but one is 16-bit whereas the other is not.
00706  *
00707  * @param orig  - The instruction that is not 16-bit
00708  * @param equiv - The instruction that is 16-bit
00709  */
00710 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
00711   off_t i;
00712 
00713   for (i = 0;; i++) {
00714     if (orig[i] == '\0' && equiv[i] == '\0')
00715       return TRUE;
00716     if (orig[i] == '\0' || equiv[i] == '\0')
00717       return FALSE;
00718     if (orig[i] != equiv[i]) {
00719       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
00720         continue;
00721       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
00722         continue;
00723       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
00724         continue;
00725       return FALSE;
00726     }
00727   }
00728 }
00729 
00730 /*
00731  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
00732  *   appropriate for extended and escape opcodes.  Determines the attributes and
00733  *   context for the instruction before doing so.
00734  *
00735  * @param insn  - The instruction whose ID is to be determined.
00736  * @return      - 0 if the ModR/M could be read when needed or was not needed;
00737  *                nonzero otherwise.
00738  */
00739 static int getID(struct InternalInstruction* insn, const void *miiArg) {
00740   uint8_t attrMask;
00741   uint16_t instructionID;
00742 
00743   dbgprintf(insn, "getID()");
00744 
00745   attrMask = ATTR_NONE;
00746 
00747   if (insn->mode == MODE_64BIT)
00748     attrMask |= ATTR_64BIT;
00749 
00750   if (insn->vexSize) {
00751     attrMask |= ATTR_VEX;
00752 
00753     if (insn->vexSize == 3) {
00754       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
00755       case VEX_PREFIX_66:
00756         attrMask |= ATTR_OPSIZE;
00757         break;
00758       case VEX_PREFIX_F3:
00759         attrMask |= ATTR_XS;
00760         break;
00761       case VEX_PREFIX_F2:
00762         attrMask |= ATTR_XD;
00763         break;
00764       }
00765 
00766       if (lFromVEX3of3(insn->vexPrefix[2]))
00767         attrMask |= ATTR_VEXL;
00768     }
00769     else if (insn->vexSize == 2) {
00770       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
00771       case VEX_PREFIX_66:
00772         attrMask |= ATTR_OPSIZE;
00773         break;
00774       case VEX_PREFIX_F3:
00775         attrMask |= ATTR_XS;
00776         break;
00777       case VEX_PREFIX_F2:
00778         attrMask |= ATTR_XD;
00779         break;
00780       }
00781 
00782       if (lFromVEX2of2(insn->vexPrefix[1]))
00783         attrMask |= ATTR_VEXL;
00784     }
00785     else {
00786       return -1;
00787     }
00788   }
00789   else {
00790     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
00791       attrMask |= ATTR_OPSIZE;
00792     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
00793       attrMask |= ATTR_ADSIZE;
00794     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
00795       attrMask |= ATTR_XS;
00796     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
00797       attrMask |= ATTR_XD;
00798   }
00799 
00800   if (insn->rexPrefix & 0x08)
00801     attrMask |= ATTR_REXW;
00802 
00803   if (getIDWithAttrMask(&instructionID, insn, attrMask))
00804     return -1;
00805 
00806   /* The following clauses compensate for limitations of the tables. */
00807 
00808   if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
00809       !(attrMask & ATTR_OPSIZE)) {
00810     /*
00811      * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
00812      * has precedence since there are no L-bit with W-bit entries in the tables.
00813      * So if the L-bit isn't significant we should use the W-bit instead.
00814      * We only need to do this if the instruction doesn't specify OpSize since
00815      * there is a VEX_L_W_OPSIZE table.
00816      */
00817 
00818     const struct InstructionSpecifier *spec;
00819     uint16_t instructionIDWithWBit;
00820     const struct InstructionSpecifier *specWithWBit;
00821 
00822     spec = specifierForUID(instructionID);
00823 
00824     if (getIDWithAttrMask(&instructionIDWithWBit,
00825                           insn,
00826                           (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
00827       insn->instructionID = instructionID;
00828       insn->spec = spec;
00829       return 0;
00830     }
00831 
00832     specWithWBit = specifierForUID(instructionIDWithWBit);
00833 
00834     if (instructionID != instructionIDWithWBit) {
00835       insn->instructionID = instructionIDWithWBit;
00836       insn->spec = specWithWBit;
00837     } else {
00838       insn->instructionID = instructionID;
00839       insn->spec = spec;
00840     }
00841     return 0;
00842   }
00843 
00844   if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
00845     /*
00846      * The instruction tables make no distinction between instructions that
00847      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
00848      * particular spot (i.e., many MMX operations).  In general we're
00849      * conservative, but in the specific case where OpSize is present but not
00850      * in the right place we check if there's a 16-bit operation.
00851      */
00852 
00853     const struct InstructionSpecifier *spec;
00854     uint16_t instructionIDWithOpsize;
00855     const char *specName, *specWithOpSizeName;
00856 
00857     spec = specifierForUID(instructionID);
00858 
00859     if (getIDWithAttrMask(&instructionIDWithOpsize,
00860                           insn,
00861                           attrMask | ATTR_OPSIZE)) {
00862       /*
00863        * ModRM required with OpSize but not present; give up and return version
00864        * without OpSize set
00865        */
00866 
00867       insn->instructionID = instructionID;
00868       insn->spec = spec;
00869       return 0;
00870     }
00871 
00872     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
00873     specWithOpSizeName =
00874       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
00875 
00876     if (is16BitEquivalent(specName, specWithOpSizeName)) {
00877       insn->instructionID = instructionIDWithOpsize;
00878       insn->spec = specifierForUID(instructionIDWithOpsize);
00879     } else {
00880       insn->instructionID = instructionID;
00881       insn->spec = spec;
00882     }
00883     return 0;
00884   }
00885 
00886   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
00887       insn->rexPrefix & 0x01) {
00888     /*
00889      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
00890      * it should decode as XCHG %r8, %eax.
00891      */
00892 
00893     const struct InstructionSpecifier *spec;
00894     uint16_t instructionIDWithNewOpcode;
00895     const struct InstructionSpecifier *specWithNewOpcode;
00896 
00897     spec = specifierForUID(instructionID);
00898 
00899     /* Borrow opcode from one of the other XCHGar opcodes */
00900     insn->opcode = 0x91;
00901 
00902     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
00903                           insn,
00904                           attrMask)) {
00905       insn->opcode = 0x90;
00906 
00907       insn->instructionID = instructionID;
00908       insn->spec = spec;
00909       return 0;
00910     }
00911 
00912     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
00913 
00914     /* Change back */
00915     insn->opcode = 0x90;
00916 
00917     insn->instructionID = instructionIDWithNewOpcode;
00918     insn->spec = specWithNewOpcode;
00919 
00920     return 0;
00921   }
00922 
00923   insn->instructionID = instructionID;
00924   insn->spec = specifierForUID(insn->instructionID);
00925 
00926   return 0;
00927 }
00928 
00929 /*
00930  * readSIB - Consumes the SIB byte to determine addressing information for an
00931  *   instruction.
00932  *
00933  * @param insn  - The instruction whose SIB byte is to be read.
00934  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
00935  */
00936 static int readSIB(struct InternalInstruction* insn) {
00937   SIBIndex sibIndexBase = 0;
00938   SIBBase sibBaseBase = 0;
00939   uint8_t index, base;
00940 
00941   dbgprintf(insn, "readSIB()");
00942 
00943   if (insn->consumedSIB)
00944     return 0;
00945 
00946   insn->consumedSIB = TRUE;
00947 
00948   switch (insn->addressSize) {
00949   case 2:
00950     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
00951     return -1;
00952     break;
00953   case 4:
00954     sibIndexBase = SIB_INDEX_EAX;
00955     sibBaseBase = SIB_BASE_EAX;
00956     break;
00957   case 8:
00958     sibIndexBase = SIB_INDEX_RAX;
00959     sibBaseBase = SIB_BASE_RAX;
00960     break;
00961   }
00962 
00963   if (consumeByte(insn, &insn->sib))
00964     return -1;
00965 
00966   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
00967 
00968   switch (index) {
00969   case 0x4:
00970     insn->sibIndex = SIB_INDEX_NONE;
00971     break;
00972   default:
00973     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
00974     if (insn->sibIndex == SIB_INDEX_sib ||
00975         insn->sibIndex == SIB_INDEX_sib64)
00976       insn->sibIndex = SIB_INDEX_NONE;
00977     break;
00978   }
00979 
00980   switch (scaleFromSIB(insn->sib)) {
00981   case 0:
00982     insn->sibScale = 1;
00983     break;
00984   case 1:
00985     insn->sibScale = 2;
00986     break;
00987   case 2:
00988     insn->sibScale = 4;
00989     break;
00990   case 3:
00991     insn->sibScale = 8;
00992     break;
00993   }
00994 
00995   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
00996 
00997   switch (base) {
00998   case 0x5:
00999     switch (modFromModRM(insn->modRM)) {
01000     case 0x0:
01001       insn->eaDisplacement = EA_DISP_32;
01002       insn->sibBase = SIB_BASE_NONE;
01003       break;
01004     case 0x1:
01005       insn->eaDisplacement = EA_DISP_8;
01006       insn->sibBase = (insn->addressSize == 4 ?
01007                        SIB_BASE_EBP : SIB_BASE_RBP);
01008       break;
01009     case 0x2:
01010       insn->eaDisplacement = EA_DISP_32;
01011       insn->sibBase = (insn->addressSize == 4 ?
01012                        SIB_BASE_EBP : SIB_BASE_RBP);
01013       break;
01014     case 0x3:
01015       debug("Cannot have Mod = 0b11 and a SIB byte");
01016       return -1;
01017     }
01018     break;
01019   default:
01020     insn->sibBase = (SIBBase)(sibBaseBase + base);
01021     break;
01022   }
01023 
01024   return 0;
01025 }
01026 
01027 /*
01028  * readDisplacement - Consumes the displacement of an instruction.
01029  *
01030  * @param insn  - The instruction whose displacement is to be read.
01031  * @return      - 0 if the displacement byte was successfully read; nonzero
01032  *                otherwise.
01033  */
01034 static int readDisplacement(struct InternalInstruction* insn) {
01035   int8_t d8;
01036   int16_t d16;
01037   int32_t d32;
01038 
01039   dbgprintf(insn, "readDisplacement()");
01040 
01041   if (insn->consumedDisplacement)
01042     return 0;
01043 
01044   insn->consumedDisplacement = TRUE;
01045   insn->displacementOffset = insn->readerCursor - insn->startLocation;
01046 
01047   switch (insn->eaDisplacement) {
01048   case EA_DISP_NONE:
01049     insn->consumedDisplacement = FALSE;
01050     break;
01051   case EA_DISP_8:
01052     if (consumeInt8(insn, &d8))
01053       return -1;
01054     insn->displacement = d8;
01055     break;
01056   case EA_DISP_16:
01057     if (consumeInt16(insn, &d16))
01058       return -1;
01059     insn->displacement = d16;
01060     break;
01061   case EA_DISP_32:
01062     if (consumeInt32(insn, &d32))
01063       return -1;
01064     insn->displacement = d32;
01065     break;
01066   }
01067 
01068   insn->consumedDisplacement = TRUE;
01069   return 0;
01070 }
01071 
01072 /*
01073  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
01074  *   displacement) for an instruction and interprets it.
01075  *
01076  * @param insn  - The instruction whose addressing information is to be read.
01077  * @return      - 0 if the information was successfully read; nonzero otherwise.
01078  */
01079 static int readModRM(struct InternalInstruction* insn) {
01080   uint8_t mod, rm, reg;
01081 
01082   dbgprintf(insn, "readModRM()");
01083 
01084   if (insn->consumedModRM)
01085     return 0;
01086 
01087   if (consumeByte(insn, &insn->modRM))
01088     return -1;
01089   insn->consumedModRM = TRUE;
01090 
01091   mod     = modFromModRM(insn->modRM);
01092   rm      = rmFromModRM(insn->modRM);
01093   reg     = regFromModRM(insn->modRM);
01094 
01095   /*
01096    * This goes by insn->registerSize to pick the correct register, which messes
01097    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
01098    * fixupReg().
01099    */
01100   switch (insn->registerSize) {
01101   case 2:
01102     insn->regBase = MODRM_REG_AX;
01103     insn->eaRegBase = EA_REG_AX;
01104     break;
01105   case 4:
01106     insn->regBase = MODRM_REG_EAX;
01107     insn->eaRegBase = EA_REG_EAX;
01108     break;
01109   case 8:
01110     insn->regBase = MODRM_REG_RAX;
01111     insn->eaRegBase = EA_REG_RAX;
01112     break;
01113   }
01114 
01115   reg |= rFromREX(insn->rexPrefix) << 3;
01116   rm  |= bFromREX(insn->rexPrefix) << 3;
01117 
01118   insn->reg = (Reg)(insn->regBase + reg);
01119 
01120   switch (insn->addressSize) {
01121   case 2:
01122     insn->eaBaseBase = EA_BASE_BX_SI;
01123 
01124     switch (mod) {
01125     case 0x0:
01126       if (rm == 0x6) {
01127         insn->eaBase = EA_BASE_NONE;
01128         insn->eaDisplacement = EA_DISP_16;
01129         if (readDisplacement(insn))
01130           return -1;
01131       } else {
01132         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
01133         insn->eaDisplacement = EA_DISP_NONE;
01134       }
01135       break;
01136     case 0x1:
01137       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
01138       insn->eaDisplacement = EA_DISP_8;
01139       if (readDisplacement(insn))
01140         return -1;
01141       break;
01142     case 0x2:
01143       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
01144       insn->eaDisplacement = EA_DISP_16;
01145       if (readDisplacement(insn))
01146         return -1;
01147       break;
01148     case 0x3:
01149       insn->eaBase = (EABase)(insn->eaRegBase + rm);
01150       if (readDisplacement(insn))
01151         return -1;
01152       break;
01153     }
01154     break;
01155   case 4:
01156   case 8:
01157     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
01158 
01159     switch (mod) {
01160     case 0x0:
01161       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
01162       switch (rm) {
01163       case 0x4:
01164       case 0xc:   /* in case REXW.b is set */
01165         insn->eaBase = (insn->addressSize == 4 ?
01166                         EA_BASE_sib : EA_BASE_sib64);
01167         readSIB(insn);
01168         if (readDisplacement(insn))
01169           return -1;
01170         break;
01171       case 0x5:
01172         insn->eaBase = EA_BASE_NONE;
01173         insn->eaDisplacement = EA_DISP_32;
01174         if (readDisplacement(insn))
01175           return -1;
01176         break;
01177       default:
01178         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
01179         break;
01180       }
01181       break;
01182     case 0x1:
01183     case 0x2:
01184       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
01185       switch (rm) {
01186       case 0x4:
01187       case 0xc:   /* in case REXW.b is set */
01188         insn->eaBase = EA_BASE_sib;
01189         readSIB(insn);
01190         if (readDisplacement(insn))
01191           return -1;
01192         break;
01193       default:
01194         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
01195         if (readDisplacement(insn))
01196           return -1;
01197         break;
01198       }
01199       break;
01200     case 0x3:
01201       insn->eaDisplacement = EA_DISP_NONE;
01202       insn->eaBase = (EABase)(insn->eaRegBase + rm);
01203       break;
01204     }
01205     break;
01206   } /* switch (insn->addressSize) */
01207 
01208   return 0;
01209 }
01210 
01211 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
01212   static uint8_t name(struct InternalInstruction *insn,   \
01213                       OperandType type,                   \
01214                       uint8_t index,                      \
01215                       uint8_t *valid) {                   \
01216     *valid = 1;                                           \
01217     switch (type) {                                       \
01218     default:                                              \
01219       debug("Unhandled register type");                   \
01220       *valid = 0;                                         \
01221       return 0;                                           \
01222     case TYPE_Rv:                                         \
01223       return base + index;                                \
01224     case TYPE_R8:                                         \
01225       if (insn->rexPrefix &&                              \
01226          index >= 4 && index <= 7) {                      \
01227         return prefix##_SPL + (index - 4);                \
01228       } else {                                            \
01229         return prefix##_AL + index;                       \
01230       }                                                   \
01231     case TYPE_R16:                                        \
01232       return prefix##_AX + index;                         \
01233     case TYPE_R32:                                        \
01234       return prefix##_EAX + index;                        \
01235     case TYPE_R64:                                        \
01236       return prefix##_RAX + index;                        \
01237     case TYPE_XMM256:                                     \
01238       return prefix##_YMM0 + index;                       \
01239     case TYPE_XMM128:                                     \
01240     case TYPE_XMM64:                                      \
01241     case TYPE_XMM32:                                      \
01242     case TYPE_XMM:                                        \
01243       return prefix##_XMM0 + index;                       \
01244     case TYPE_MM64:                                       \
01245     case TYPE_MM32:                                       \
01246     case TYPE_MM:                                         \
01247       if (index > 7)                                      \
01248         *valid = 0;                                       \
01249       return prefix##_MM0 + index;                        \
01250     case TYPE_SEGMENTREG:                                 \
01251       if (index > 5)                                      \
01252         *valid = 0;                                       \
01253       return prefix##_ES + index;                         \
01254     case TYPE_DEBUGREG:                                   \
01255       if (index > 7)                                      \
01256         *valid = 0;                                       \
01257       return prefix##_DR0 + index;                        \
01258     case TYPE_CONTROLREG:                                 \
01259       if (index > 8)                                      \
01260         *valid = 0;                                       \
01261       return prefix##_CR0 + index;                        \
01262     }                                                     \
01263   }
01264 
01265 /*
01266  * fixup*Value - Consults an operand type to determine the meaning of the
01267  *   reg or R/M field.  If the operand is an XMM operand, for example, an
01268  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
01269  *   misinterpret it as.
01270  *
01271  * @param insn  - The instruction containing the operand.
01272  * @param type  - The operand type.
01273  * @param index - The existing value of the field as reported by readModRM().
01274  * @param valid - The address of a uint8_t.  The target is set to 1 if the
01275  *                field is valid for the register class; 0 if not.
01276  * @return      - The proper value.
01277  */
01278 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
01279 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
01280 
01281 /*
01282  * fixupReg - Consults an operand specifier to determine which of the
01283  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
01284  *
01285  * @param insn  - See fixup*Value().
01286  * @param op    - The operand specifier.
01287  * @return      - 0 if fixup was successful; -1 if the register returned was
01288  *                invalid for its class.
01289  */
01290 static int fixupReg(struct InternalInstruction *insn,
01291                     const struct OperandSpecifier *op) {
01292   uint8_t valid;
01293 
01294   dbgprintf(insn, "fixupReg()");
01295 
01296   switch ((OperandEncoding)op->encoding) {
01297   default:
01298     debug("Expected a REG or R/M encoding in fixupReg");
01299     return -1;
01300   case ENCODING_VVVV:
01301     insn->vvvv = (Reg)fixupRegValue(insn,
01302                                     (OperandType)op->type,
01303                                     insn->vvvv,
01304                                     &valid);
01305     if (!valid)
01306       return -1;
01307     break;
01308   case ENCODING_REG:
01309     insn->reg = (Reg)fixupRegValue(insn,
01310                                    (OperandType)op->type,
01311                                    insn->reg - insn->regBase,
01312                                    &valid);
01313     if (!valid)
01314       return -1;
01315     break;
01316   case ENCODING_RM:
01317     if (insn->eaBase >= insn->eaRegBase) {
01318       insn->eaBase = (EABase)fixupRMValue(insn,
01319                                           (OperandType)op->type,
01320                                           insn->eaBase - insn->eaRegBase,
01321                                           &valid);
01322       if (!valid)
01323         return -1;
01324     }
01325     break;
01326   }
01327 
01328   return 0;
01329 }
01330 
01331 /*
01332  * readOpcodeModifier - Reads an operand from the opcode field of an
01333  *   instruction.  Handles AddRegFrm instructions.
01334  *
01335  * @param insn    - The instruction whose opcode field is to be read.
01336  * @param inModRM - Indicates that the opcode field is to be read from the
01337  *                  ModR/M extension; useful for escape opcodes
01338  * @return        - 0 on success; nonzero otherwise.
01339  */
01340 static int readOpcodeModifier(struct InternalInstruction* insn) {
01341   dbgprintf(insn, "readOpcodeModifier()");
01342 
01343   if (insn->consumedOpcodeModifier)
01344     return 0;
01345 
01346   insn->consumedOpcodeModifier = TRUE;
01347 
01348   switch (insn->spec->modifierType) {
01349   default:
01350     debug("Unknown modifier type.");
01351     return -1;
01352   case MODIFIER_NONE:
01353     debug("No modifier but an operand expects one.");
01354     return -1;
01355   case MODIFIER_OPCODE:
01356     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
01357     return 0;
01358   case MODIFIER_MODRM:
01359     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
01360     return 0;
01361   }
01362 }
01363 
01364 /*
01365  * readOpcodeRegister - Reads an operand from the opcode field of an
01366  *   instruction and interprets it appropriately given the operand width.
01367  *   Handles AddRegFrm instructions.
01368  *
01369  * @param insn  - See readOpcodeModifier().
01370  * @param size  - The width (in bytes) of the register being specified.
01371  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
01372  *                RAX.
01373  * @return      - 0 on success; nonzero otherwise.
01374  */
01375 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
01376   dbgprintf(insn, "readOpcodeRegister()");
01377 
01378   if (readOpcodeModifier(insn))
01379     return -1;
01380 
01381   if (size == 0)
01382     size = insn->registerSize;
01383 
01384   switch (size) {
01385   case 1:
01386     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
01387                                                   | insn->opcodeModifier));
01388     if (insn->rexPrefix &&
01389         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
01390         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
01391       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
01392                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
01393     }
01394 
01395     break;
01396   case 2:
01397     insn->opcodeRegister = (Reg)(MODRM_REG_AX
01398                                  + ((bFromREX(insn->rexPrefix) << 3)
01399                                     | insn->opcodeModifier));
01400     break;
01401   case 4:
01402     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
01403                                  + ((bFromREX(insn->rexPrefix) << 3)
01404                                     | insn->opcodeModifier));
01405     break;
01406   case 8:
01407     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
01408                                  + ((bFromREX(insn->rexPrefix) << 3)
01409                                     | insn->opcodeModifier));
01410     break;
01411   }
01412 
01413   return 0;
01414 }
01415 
01416 /*
01417  * readImmediate - Consumes an immediate operand from an instruction, given the
01418  *   desired operand size.
01419  *
01420  * @param insn  - The instruction whose operand is to be read.
01421  * @param size  - The width (in bytes) of the operand.
01422  * @return      - 0 if the immediate was successfully consumed; nonzero
01423  *                otherwise.
01424  */
01425 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
01426   uint8_t imm8;
01427   uint16_t imm16;
01428   uint32_t imm32;
01429   uint64_t imm64;
01430 
01431   dbgprintf(insn, "readImmediate()");
01432 
01433   if (insn->numImmediatesConsumed == 2) {
01434     debug("Already consumed two immediates");
01435     return -1;
01436   }
01437 
01438   if (size == 0)
01439     size = insn->immediateSize;
01440   else
01441     insn->immediateSize = size;
01442   insn->immediateOffset = insn->readerCursor - insn->startLocation;
01443 
01444   switch (size) {
01445   case 1:
01446     if (consumeByte(insn, &imm8))
01447       return -1;
01448     insn->immediates[insn->numImmediatesConsumed] = imm8;
01449     break;
01450   case 2:
01451     if (consumeUInt16(insn, &imm16))
01452       return -1;
01453     insn->immediates[insn->numImmediatesConsumed] = imm16;
01454     break;
01455   case 4:
01456     if (consumeUInt32(insn, &imm32))
01457       return -1;
01458     insn->immediates[insn->numImmediatesConsumed] = imm32;
01459     break;
01460   case 8:
01461     if (consumeUInt64(insn, &imm64))
01462       return -1;
01463     insn->immediates[insn->numImmediatesConsumed] = imm64;
01464     break;
01465   }
01466 
01467   insn->numImmediatesConsumed++;
01468 
01469   return 0;
01470 }
01471 
01472 /*
01473  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
01474  *
01475  * @param insn  - The instruction whose operand is to be read.
01476  * @return      - 0 if the vvvv was successfully consumed; nonzero
01477  *                otherwise.
01478  */
01479 static int readVVVV(struct InternalInstruction* insn) {
01480   dbgprintf(insn, "readVVVV()");
01481 
01482   if (insn->vexSize == 3)
01483     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
01484   else if (insn->vexSize == 2)
01485     insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
01486   else
01487     return -1;
01488 
01489   if (insn->mode != MODE_64BIT)
01490     insn->vvvv &= 0x7;
01491 
01492   return 0;
01493 }
01494 
01495 /*
01496  * readOperands - Consults the specifier for an instruction and consumes all
01497  *   operands for that instruction, interpreting them as it goes.
01498  *
01499  * @param insn  - The instruction whose operands are to be read and interpreted.
01500  * @return      - 0 if all operands could be read; nonzero otherwise.
01501  */
01502 static int readOperands(struct InternalInstruction* insn) {
01503   int index;
01504   int hasVVVV, needVVVV;
01505   int sawRegImm = 0;
01506 
01507   dbgprintf(insn, "readOperands()");
01508 
01509   /* If non-zero vvvv specified, need to make sure one of the operands
01510      uses it. */
01511   hasVVVV = !readVVVV(insn);
01512   needVVVV = hasVVVV && (insn->vvvv != 0);
01513 
01514   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
01515     switch (x86OperandSets[insn->spec->operands][index].encoding) {
01516     case ENCODING_NONE:
01517       break;
01518     case ENCODING_REG:
01519     case ENCODING_RM:
01520       if (readModRM(insn))
01521         return -1;
01522       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
01523         return -1;
01524       break;
01525     case ENCODING_CB:
01526     case ENCODING_CW:
01527     case ENCODING_CD:
01528     case ENCODING_CP:
01529     case ENCODING_CO:
01530     case ENCODING_CT:
01531       dbgprintf(insn, "We currently don't hande code-offset encodings");
01532       return -1;
01533     case ENCODING_IB:
01534       if (sawRegImm) {
01535         /* Saw a register immediate so don't read again and instead split the
01536            previous immediate.  FIXME: This is a hack. */
01537         insn->immediates[insn->numImmediatesConsumed] =
01538           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
01539         ++insn->numImmediatesConsumed;
01540         break;
01541       }
01542       if (readImmediate(insn, 1))
01543         return -1;
01544       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
01545           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
01546         return -1;
01547       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
01548           insn->immediates[insn->numImmediatesConsumed - 1] > 31)
01549         return -1;
01550       if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
01551           x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
01552         sawRegImm = 1;
01553       break;
01554     case ENCODING_IW:
01555       if (readImmediate(insn, 2))
01556         return -1;
01557       break;
01558     case ENCODING_ID:
01559       if (readImmediate(insn, 4))
01560         return -1;
01561       break;
01562     case ENCODING_IO:
01563       if (readImmediate(insn, 8))
01564         return -1;
01565       break;
01566     case ENCODING_Iv:
01567       if (readImmediate(insn, insn->immediateSize))
01568         return -1;
01569       break;
01570     case ENCODING_Ia:
01571       if (readImmediate(insn, insn->addressSize))
01572         return -1;
01573       break;
01574     case ENCODING_RB:
01575       if (readOpcodeRegister(insn, 1))
01576         return -1;
01577       break;
01578     case ENCODING_RW:
01579       if (readOpcodeRegister(insn, 2))
01580         return -1;
01581       break;
01582     case ENCODING_RD:
01583       if (readOpcodeRegister(insn, 4))
01584         return -1;
01585       break;
01586     case ENCODING_RO:
01587       if (readOpcodeRegister(insn, 8))
01588         return -1;
01589       break;
01590     case ENCODING_Rv:
01591       if (readOpcodeRegister(insn, 0))
01592         return -1;
01593       break;
01594     case ENCODING_I:
01595       if (readOpcodeModifier(insn))
01596         return -1;
01597       break;
01598     case ENCODING_VVVV:
01599       needVVVV = 0; /* Mark that we have found a VVVV operand. */
01600       if (!hasVVVV)
01601         return -1;
01602       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
01603         return -1;
01604       break;
01605     case ENCODING_DUP:
01606       break;
01607     default:
01608       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
01609       return -1;
01610     }
01611   }
01612 
01613   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
01614   if (needVVVV) return -1;
01615 
01616   return 0;
01617 }
01618 
01619 /*
01620  * decodeInstruction - Reads and interprets a full instruction provided by the
01621  *   user.
01622  *
01623  * @param insn      - A pointer to the instruction to be populated.  Must be
01624  *                    pre-allocated.
01625  * @param reader    - The function to be used to read the instruction's bytes.
01626  * @param readerArg - A generic argument to be passed to the reader to store
01627  *                    any internal state.
01628  * @param logger    - If non-NULL, the function to be used to write log messages
01629  *                    and warnings.
01630  * @param loggerArg - A generic argument to be passed to the logger to store
01631  *                    any internal state.
01632  * @param startLoc  - The address (in the reader's address space) of the first
01633  *                    byte in the instruction.
01634  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
01635  *                    decode the instruction in.
01636  * @return          - 0 if the instruction's memory could be read; nonzero if
01637  *                    not.
01638  */
01639 int decodeInstruction(struct InternalInstruction* insn,
01640                       byteReader_t reader,
01641                       const void* readerArg,
01642                       dlog_t logger,
01643                       void* loggerArg,
01644                       const void* miiArg,
01645                       uint64_t startLoc,
01646                       DisassemblerMode mode) {
01647   memset(insn, 0, sizeof(struct InternalInstruction));
01648 
01649   insn->reader = reader;
01650   insn->readerArg = readerArg;
01651   insn->dlog = logger;
01652   insn->dlogArg = loggerArg;
01653   insn->startLocation = startLoc;
01654   insn->readerCursor = startLoc;
01655   insn->mode = mode;
01656   insn->numImmediatesConsumed = 0;
01657 
01658   if (readPrefixes(insn)       ||
01659       readOpcode(insn)         ||
01660       getID(insn, miiArg)      ||
01661       insn->instructionID == 0 ||
01662       readOperands(insn))
01663     return -1;
01664 
01665   insn->operands = &x86OperandSets[insn->spec->operands][0];
01666 
01667   insn->length = insn->readerCursor - insn->startLocation;
01668 
01669   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
01670             startLoc, insn->readerCursor, insn->length);
01671 
01672   if (insn->length > 15)
01673     dbgprintf(insn, "Instruction exceeds 15-byte limit");
01674 
01675   return 0;
01676 }