LLVM API Documentation
00001 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 00002 * 00003 * The LLVM Compiler Infrastructure 00004 * 00005 * This file is distributed under the University of Illinois Open Source 00006 * License. See LICENSE.TXT for details. 00007 * 00008 *===----------------------------------------------------------------------===* 00009 * 00010 * This file is part of the X86 Disassembler. 00011 * It contains the implementation of the instruction decoder. 00012 * Documentation for the disassembler can be found in X86Disassembler.h. 00013 * 00014 *===----------------------------------------------------------------------===*/ 00015 00016 #include <stdarg.h> /* for va_*() */ 00017 #include <stdio.h> /* for vsnprintf() */ 00018 #include <stdlib.h> /* for exit() */ 00019 #include <string.h> /* for memset() */ 00020 00021 #include "X86DisassemblerDecoder.h" 00022 00023 #include "X86GenDisassemblerTables.inc" 00024 00025 #define TRUE 1 00026 #define FALSE 0 00027 00028 typedef int8_t bool; 00029 00030 #ifndef NDEBUG 00031 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 00032 #else 00033 #define debug(s) do { } while (0) 00034 #endif 00035 00036 00037 /* 00038 * contextForAttrs - Client for the instruction context table. Takes a set of 00039 * attributes and returns the appropriate decode context. 00040 * 00041 * @param attrMask - Attributes, from the enumeration attributeBits. 00042 * @return - The InstructionContext to use when looking up an 00043 * an instruction with these attributes. 00044 */ 00045 static InstructionContext contextForAttrs(uint8_t attrMask) { 00046 return CONTEXTS_SYM[attrMask]; 00047 } 00048 00049 /* 00050 * modRMRequired - Reads the appropriate instruction table to determine whether 00051 * the ModR/M byte is required to decode a particular instruction. 00052 * 00053 * @param type - The opcode type (i.e., how many bytes it has). 00054 * @param insnContext - The context for the instruction, as returned by 00055 * contextForAttrs. 00056 * @param opcode - The last byte of the instruction's opcode, not counting 00057 * ModR/M extensions and escapes. 00058 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 00059 */ 00060 static int modRMRequired(OpcodeType type, 00061 InstructionContext insnContext, 00062 uint8_t opcode) { 00063 const struct ContextDecision* decision = 0; 00064 00065 switch (type) { 00066 case ONEBYTE: 00067 decision = &ONEBYTE_SYM; 00068 break; 00069 case TWOBYTE: 00070 decision = &TWOBYTE_SYM; 00071 break; 00072 case THREEBYTE_38: 00073 decision = &THREEBYTE38_SYM; 00074 break; 00075 case THREEBYTE_3A: 00076 decision = &THREEBYTE3A_SYM; 00077 break; 00078 case THREEBYTE_A6: 00079 decision = &THREEBYTEA6_SYM; 00080 break; 00081 case THREEBYTE_A7: 00082 decision = &THREEBYTEA7_SYM; 00083 break; 00084 } 00085 00086 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 00087 modrm_type != MODRM_ONEENTRY; 00088 } 00089 00090 /* 00091 * decode - Reads the appropriate instruction table to obtain the unique ID of 00092 * an instruction. 00093 * 00094 * @param type - See modRMRequired(). 00095 * @param insnContext - See modRMRequired(). 00096 * @param opcode - See modRMRequired(). 00097 * @param modRM - The ModR/M byte if required, or any value if not. 00098 * @return - The UID of the instruction, or 0 on failure. 00099 */ 00100 static InstrUID decode(OpcodeType type, 00101 InstructionContext insnContext, 00102 uint8_t opcode, 00103 uint8_t modRM) { 00104 const struct ModRMDecision* dec = 0; 00105 00106 switch (type) { 00107 case ONEBYTE: 00108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00109 break; 00110 case TWOBYTE: 00111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00112 break; 00113 case THREEBYTE_38: 00114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00115 break; 00116 case THREEBYTE_3A: 00117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00118 break; 00119 case THREEBYTE_A6: 00120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00121 break; 00122 case THREEBYTE_A7: 00123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 00124 break; 00125 } 00126 00127 switch (dec->modrm_type) { 00128 default: 00129 debug("Corrupt table! Unknown modrm_type"); 00130 return 0; 00131 case MODRM_ONEENTRY: 00132 return modRMTable[dec->instructionIDs]; 00133 case MODRM_SPLITRM: 00134 if (modFromModRM(modRM) == 0x3) 00135 return modRMTable[dec->instructionIDs+1]; 00136 return modRMTable[dec->instructionIDs]; 00137 case MODRM_SPLITREG: 00138 if (modFromModRM(modRM) == 0x3) 00139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 00140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 00141 case MODRM_SPLITMISC: 00142 if (modFromModRM(modRM) == 0x3) 00143 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 00144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 00145 case MODRM_FULL: 00146 return modRMTable[dec->instructionIDs+modRM]; 00147 } 00148 } 00149 00150 /* 00151 * specifierForUID - Given a UID, returns the name and operand specification for 00152 * that instruction. 00153 * 00154 * @param uid - The unique ID for the instruction. This should be returned by 00155 * decode(); specifierForUID will not check bounds. 00156 * @return - A pointer to the specification for that instruction. 00157 */ 00158 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 00159 return &INSTRUCTIONS_SYM[uid]; 00160 } 00161 00162 /* 00163 * consumeByte - Uses the reader function provided by the user to consume one 00164 * byte from the instruction's memory and advance the cursor. 00165 * 00166 * @param insn - The instruction with the reader function to use. The cursor 00167 * for this instruction is advanced. 00168 * @param byte - A pointer to a pre-allocated memory buffer to be populated 00169 * with the data read. 00170 * @return - 0 if the read was successful; nonzero otherwise. 00171 */ 00172 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 00173 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 00174 00175 if (!ret) 00176 ++(insn->readerCursor); 00177 00178 return ret; 00179 } 00180 00181 /* 00182 * lookAtByte - Like consumeByte, but does not advance the cursor. 00183 * 00184 * @param insn - See consumeByte(). 00185 * @param byte - See consumeByte(). 00186 * @return - See consumeByte(). 00187 */ 00188 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 00189 return insn->reader(insn->readerArg, byte, insn->readerCursor); 00190 } 00191 00192 static void unconsumeByte(struct InternalInstruction* insn) { 00193 insn->readerCursor--; 00194 } 00195 00196 #define CONSUME_FUNC(name, type) \ 00197 static int name(struct InternalInstruction* insn, type* ptr) { \ 00198 type combined = 0; \ 00199 unsigned offset; \ 00200 for (offset = 0; offset < sizeof(type); ++offset) { \ 00201 uint8_t byte; \ 00202 int ret = insn->reader(insn->readerArg, \ 00203 &byte, \ 00204 insn->readerCursor + offset); \ 00205 if (ret) \ 00206 return ret; \ 00207 combined = combined | ((uint64_t)byte << (offset * 8)); \ 00208 } \ 00209 *ptr = combined; \ 00210 insn->readerCursor += sizeof(type); \ 00211 return 0; \ 00212 } 00213 00214 /* 00215 * consume* - Use the reader function provided by the user to consume data 00216 * values of various sizes from the instruction's memory and advance the 00217 * cursor appropriately. These readers perform endian conversion. 00218 * 00219 * @param insn - See consumeByte(). 00220 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 00221 * be populated with the data read. 00222 * @return - See consumeByte(). 00223 */ 00224 CONSUME_FUNC(consumeInt8, int8_t) 00225 CONSUME_FUNC(consumeInt16, int16_t) 00226 CONSUME_FUNC(consumeInt32, int32_t) 00227 CONSUME_FUNC(consumeUInt16, uint16_t) 00228 CONSUME_FUNC(consumeUInt32, uint32_t) 00229 CONSUME_FUNC(consumeUInt64, uint64_t) 00230 00231 /* 00232 * dbgprintf - Uses the logging function provided by the user to log a single 00233 * message, typically without a carriage-return. 00234 * 00235 * @param insn - The instruction containing the logging function. 00236 * @param format - See printf(). 00237 * @param ... - See printf(). 00238 */ 00239 static void dbgprintf(struct InternalInstruction* insn, 00240 const char* format, 00241 ...) { 00242 char buffer[256]; 00243 va_list ap; 00244 00245 if (!insn->dlog) 00246 return; 00247 00248 va_start(ap, format); 00249 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 00250 va_end(ap); 00251 00252 insn->dlog(insn->dlogArg, buffer); 00253 00254 return; 00255 } 00256 00257 /* 00258 * setPrefixPresent - Marks that a particular prefix is present at a particular 00259 * location. 00260 * 00261 * @param insn - The instruction to be marked as having the prefix. 00262 * @param prefix - The prefix that is present. 00263 * @param location - The location where the prefix is located (in the address 00264 * space of the instruction's reader). 00265 */ 00266 static void setPrefixPresent(struct InternalInstruction* insn, 00267 uint8_t prefix, 00268 uint64_t location) 00269 { 00270 insn->prefixPresent[prefix] = 1; 00271 insn->prefixLocations[prefix] = location; 00272 } 00273 00274 /* 00275 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 00276 * present at a given location. 00277 * 00278 * @param insn - The instruction to be queried. 00279 * @param prefix - The prefix. 00280 * @param location - The location to query. 00281 * @return - Whether the prefix is at that location. 00282 */ 00283 static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 00284 uint8_t prefix, 00285 uint64_t location) 00286 { 00287 if (insn->prefixPresent[prefix] == 1 && 00288 insn->prefixLocations[prefix] == location) 00289 return TRUE; 00290 else 00291 return FALSE; 00292 } 00293 00294 /* 00295 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 00296 * instruction as having them. Also sets the instruction's default operand, 00297 * address, and other relevant data sizes to report operands correctly. 00298 * 00299 * @param insn - The instruction whose prefixes are to be read. 00300 * @return - 0 if the instruction could be read until the end of the prefix 00301 * bytes, and no prefixes conflicted; nonzero otherwise. 00302 */ 00303 static int readPrefixes(struct InternalInstruction* insn) { 00304 BOOL isPrefix = TRUE; 00305 BOOL prefixGroups[4] = { FALSE }; 00306 uint64_t prefixLocation; 00307 uint8_t byte = 0; 00308 00309 BOOL hasAdSize = FALSE; 00310 BOOL hasOpSize = FALSE; 00311 00312 dbgprintf(insn, "readPrefixes()"); 00313 00314 while (isPrefix) { 00315 prefixLocation = insn->readerCursor; 00316 00317 if (consumeByte(insn, &byte)) 00318 return -1; 00319 00320 /* 00321 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 00322 * break and let it be disassembled as a normal "instruction". 00323 */ 00324 if (insn->readerCursor - 1 == insn->startLocation 00325 && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { 00326 uint8_t nextByte; 00327 if (byte == 0xf0) 00328 break; 00329 if (lookAtByte(insn, &nextByte)) 00330 return -1; 00331 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 00332 if (consumeByte(insn, &nextByte)) 00333 return -1; 00334 if (lookAtByte(insn, &nextByte)) 00335 return -1; 00336 unconsumeByte(insn); 00337 } 00338 if (nextByte != 0x0f && nextByte != 0x90) 00339 break; 00340 } 00341 00342 switch (byte) { 00343 case 0xf0: /* LOCK */ 00344 case 0xf2: /* REPNE/REPNZ */ 00345 case 0xf3: /* REP or REPE/REPZ */ 00346 if (prefixGroups[0]) 00347 dbgprintf(insn, "Redundant Group 1 prefix"); 00348 prefixGroups[0] = TRUE; 00349 setPrefixPresent(insn, byte, prefixLocation); 00350 break; 00351 case 0x2e: /* CS segment override -OR- Branch not taken */ 00352 case 0x36: /* SS segment override -OR- Branch taken */ 00353 case 0x3e: /* DS segment override */ 00354 case 0x26: /* ES segment override */ 00355 case 0x64: /* FS segment override */ 00356 case 0x65: /* GS segment override */ 00357 switch (byte) { 00358 case 0x2e: 00359 insn->segmentOverride = SEG_OVERRIDE_CS; 00360 break; 00361 case 0x36: 00362 insn->segmentOverride = SEG_OVERRIDE_SS; 00363 break; 00364 case 0x3e: 00365 insn->segmentOverride = SEG_OVERRIDE_DS; 00366 break; 00367 case 0x26: 00368 insn->segmentOverride = SEG_OVERRIDE_ES; 00369 break; 00370 case 0x64: 00371 insn->segmentOverride = SEG_OVERRIDE_FS; 00372 break; 00373 case 0x65: 00374 insn->segmentOverride = SEG_OVERRIDE_GS; 00375 break; 00376 default: 00377 debug("Unhandled override"); 00378 return -1; 00379 } 00380 if (prefixGroups[1]) 00381 dbgprintf(insn, "Redundant Group 2 prefix"); 00382 prefixGroups[1] = TRUE; 00383 setPrefixPresent(insn, byte, prefixLocation); 00384 break; 00385 case 0x66: /* Operand-size override */ 00386 if (prefixGroups[2]) 00387 dbgprintf(insn, "Redundant Group 3 prefix"); 00388 prefixGroups[2] = TRUE; 00389 hasOpSize = TRUE; 00390 setPrefixPresent(insn, byte, prefixLocation); 00391 break; 00392 case 0x67: /* Address-size override */ 00393 if (prefixGroups[3]) 00394 dbgprintf(insn, "Redundant Group 4 prefix"); 00395 prefixGroups[3] = TRUE; 00396 hasAdSize = TRUE; 00397 setPrefixPresent(insn, byte, prefixLocation); 00398 break; 00399 default: /* Not a prefix byte */ 00400 isPrefix = FALSE; 00401 break; 00402 } 00403 00404 if (isPrefix) 00405 dbgprintf(insn, "Found prefix 0x%hhx", byte); 00406 } 00407 00408 insn->vexSize = 0; 00409 00410 if (byte == 0xc4) { 00411 uint8_t byte1; 00412 00413 if (lookAtByte(insn, &byte1)) { 00414 dbgprintf(insn, "Couldn't read second byte of VEX"); 00415 return -1; 00416 } 00417 00418 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 00419 insn->vexSize = 3; 00420 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00421 } 00422 else { 00423 unconsumeByte(insn); 00424 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00425 } 00426 00427 if (insn->vexSize == 3) { 00428 insn->vexPrefix[0] = byte; 00429 consumeByte(insn, &insn->vexPrefix[1]); 00430 consumeByte(insn, &insn->vexPrefix[2]); 00431 00432 /* We simulate the REX prefix for simplicity's sake */ 00433 00434 if (insn->mode == MODE_64BIT) { 00435 insn->rexPrefix = 0x40 00436 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 00437 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 00438 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 00439 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 00440 } 00441 00442 switch (ppFromVEX3of3(insn->vexPrefix[2])) 00443 { 00444 default: 00445 break; 00446 case VEX_PREFIX_66: 00447 hasOpSize = TRUE; 00448 break; 00449 } 00450 00451 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 00452 } 00453 } 00454 else if (byte == 0xc5) { 00455 uint8_t byte1; 00456 00457 if (lookAtByte(insn, &byte1)) { 00458 dbgprintf(insn, "Couldn't read second byte of VEX"); 00459 return -1; 00460 } 00461 00462 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 00463 insn->vexSize = 2; 00464 } 00465 else { 00466 unconsumeByte(insn); 00467 } 00468 00469 if (insn->vexSize == 2) { 00470 insn->vexPrefix[0] = byte; 00471 consumeByte(insn, &insn->vexPrefix[1]); 00472 00473 if (insn->mode == MODE_64BIT) { 00474 insn->rexPrefix = 0x40 00475 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 00476 } 00477 00478 switch (ppFromVEX2of2(insn->vexPrefix[1])) 00479 { 00480 default: 00481 break; 00482 case VEX_PREFIX_66: 00483 hasOpSize = TRUE; 00484 break; 00485 } 00486 00487 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 00488 } 00489 } 00490 else { 00491 if (insn->mode == MODE_64BIT) { 00492 if ((byte & 0xf0) == 0x40) { 00493 uint8_t opcodeByte; 00494 00495 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 00496 dbgprintf(insn, "Redundant REX prefix"); 00497 return -1; 00498 } 00499 00500 insn->rexPrefix = byte; 00501 insn->necessaryPrefixLocation = insn->readerCursor - 2; 00502 00503 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 00504 } else { 00505 unconsumeByte(insn); 00506 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00507 } 00508 } else { 00509 unconsumeByte(insn); 00510 insn->necessaryPrefixLocation = insn->readerCursor - 1; 00511 } 00512 } 00513 00514 if (insn->mode == MODE_16BIT) { 00515 insn->registerSize = (hasOpSize ? 4 : 2); 00516 insn->addressSize = (hasAdSize ? 4 : 2); 00517 insn->displacementSize = (hasAdSize ? 4 : 2); 00518 insn->immediateSize = (hasOpSize ? 4 : 2); 00519 } else if (insn->mode == MODE_32BIT) { 00520 insn->registerSize = (hasOpSize ? 2 : 4); 00521 insn->addressSize = (hasAdSize ? 2 : 4); 00522 insn->displacementSize = (hasAdSize ? 2 : 4); 00523 insn->immediateSize = (hasOpSize ? 2 : 4); 00524 } else if (insn->mode == MODE_64BIT) { 00525 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 00526 insn->registerSize = 8; 00527 insn->addressSize = (hasAdSize ? 4 : 8); 00528 insn->displacementSize = 4; 00529 insn->immediateSize = 4; 00530 } else if (insn->rexPrefix) { 00531 insn->registerSize = (hasOpSize ? 2 : 4); 00532 insn->addressSize = (hasAdSize ? 4 : 8); 00533 insn->displacementSize = (hasOpSize ? 2 : 4); 00534 insn->immediateSize = (hasOpSize ? 2 : 4); 00535 } else { 00536 insn->registerSize = (hasOpSize ? 2 : 4); 00537 insn->addressSize = (hasAdSize ? 4 : 8); 00538 insn->displacementSize = (hasOpSize ? 2 : 4); 00539 insn->immediateSize = (hasOpSize ? 2 : 4); 00540 } 00541 } 00542 00543 return 0; 00544 } 00545 00546 /* 00547 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 00548 * extended or escape opcodes). 00549 * 00550 * @param insn - The instruction whose opcode is to be read. 00551 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 00552 */ 00553 static int readOpcode(struct InternalInstruction* insn) { 00554 /* Determine the length of the primary opcode */ 00555 00556 uint8_t current; 00557 00558 dbgprintf(insn, "readOpcode()"); 00559 00560 insn->opcodeType = ONEBYTE; 00561 00562 if (insn->vexSize == 3) 00563 { 00564 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 00565 { 00566 default: 00567 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 00568 return -1; 00569 case 0: 00570 break; 00571 case VEX_LOB_0F: 00572 insn->twoByteEscape = 0x0f; 00573 insn->opcodeType = TWOBYTE; 00574 return consumeByte(insn, &insn->opcode); 00575 case VEX_LOB_0F38: 00576 insn->twoByteEscape = 0x0f; 00577 insn->threeByteEscape = 0x38; 00578 insn->opcodeType = THREEBYTE_38; 00579 return consumeByte(insn, &insn->opcode); 00580 case VEX_LOB_0F3A: 00581 insn->twoByteEscape = 0x0f; 00582 insn->threeByteEscape = 0x3a; 00583 insn->opcodeType = THREEBYTE_3A; 00584 return consumeByte(insn, &insn->opcode); 00585 } 00586 } 00587 else if (insn->vexSize == 2) 00588 { 00589 insn->twoByteEscape = 0x0f; 00590 insn->opcodeType = TWOBYTE; 00591 return consumeByte(insn, &insn->opcode); 00592 } 00593 00594 if (consumeByte(insn, ¤t)) 00595 return -1; 00596 00597 if (current == 0x0f) { 00598 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 00599 00600 insn->twoByteEscape = current; 00601 00602 if (consumeByte(insn, ¤t)) 00603 return -1; 00604 00605 if (current == 0x38) { 00606 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00607 00608 insn->threeByteEscape = current; 00609 00610 if (consumeByte(insn, ¤t)) 00611 return -1; 00612 00613 insn->opcodeType = THREEBYTE_38; 00614 } else if (current == 0x3a) { 00615 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00616 00617 insn->threeByteEscape = current; 00618 00619 if (consumeByte(insn, ¤t)) 00620 return -1; 00621 00622 insn->opcodeType = THREEBYTE_3A; 00623 } else if (current == 0xa6) { 00624 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00625 00626 insn->threeByteEscape = current; 00627 00628 if (consumeByte(insn, ¤t)) 00629 return -1; 00630 00631 insn->opcodeType = THREEBYTE_A6; 00632 } else if (current == 0xa7) { 00633 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 00634 00635 insn->threeByteEscape = current; 00636 00637 if (consumeByte(insn, ¤t)) 00638 return -1; 00639 00640 insn->opcodeType = THREEBYTE_A7; 00641 } else { 00642 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 00643 00644 insn->opcodeType = TWOBYTE; 00645 } 00646 } 00647 00648 /* 00649 * At this point we have consumed the full opcode. 00650 * Anything we consume from here on must be unconsumed. 00651 */ 00652 00653 insn->opcode = current; 00654 00655 return 0; 00656 } 00657 00658 static int readModRM(struct InternalInstruction* insn); 00659 00660 /* 00661 * getIDWithAttrMask - Determines the ID of an instruction, consuming 00662 * the ModR/M byte as appropriate for extended and escape opcodes, 00663 * and using a supplied attribute mask. 00664 * 00665 * @param instructionID - A pointer whose target is filled in with the ID of the 00666 * instruction. 00667 * @param insn - The instruction whose ID is to be determined. 00668 * @param attrMask - The attribute mask to search. 00669 * @return - 0 if the ModR/M could be read when needed or was not 00670 * needed; nonzero otherwise. 00671 */ 00672 static int getIDWithAttrMask(uint16_t* instructionID, 00673 struct InternalInstruction* insn, 00674 uint8_t attrMask) { 00675 BOOL hasModRMExtension; 00676 00677 uint8_t instructionClass; 00678 00679 instructionClass = contextForAttrs(attrMask); 00680 00681 hasModRMExtension = modRMRequired(insn->opcodeType, 00682 instructionClass, 00683 insn->opcode); 00684 00685 if (hasModRMExtension) { 00686 if (readModRM(insn)) 00687 return -1; 00688 00689 *instructionID = decode(insn->opcodeType, 00690 instructionClass, 00691 insn->opcode, 00692 insn->modRM); 00693 } else { 00694 *instructionID = decode(insn->opcodeType, 00695 instructionClass, 00696 insn->opcode, 00697 0); 00698 } 00699 00700 return 0; 00701 } 00702 00703 /* 00704 * is16BitEquivalent - Determines whether two instruction names refer to 00705 * equivalent instructions but one is 16-bit whereas the other is not. 00706 * 00707 * @param orig - The instruction that is not 16-bit 00708 * @param equiv - The instruction that is 16-bit 00709 */ 00710 static BOOL is16BitEquivalent(const char* orig, const char* equiv) { 00711 off_t i; 00712 00713 for (i = 0;; i++) { 00714 if (orig[i] == '\0' && equiv[i] == '\0') 00715 return TRUE; 00716 if (orig[i] == '\0' || equiv[i] == '\0') 00717 return FALSE; 00718 if (orig[i] != equiv[i]) { 00719 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 00720 continue; 00721 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 00722 continue; 00723 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 00724 continue; 00725 return FALSE; 00726 } 00727 } 00728 } 00729 00730 /* 00731 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 00732 * appropriate for extended and escape opcodes. Determines the attributes and 00733 * context for the instruction before doing so. 00734 * 00735 * @param insn - The instruction whose ID is to be determined. 00736 * @return - 0 if the ModR/M could be read when needed or was not needed; 00737 * nonzero otherwise. 00738 */ 00739 static int getID(struct InternalInstruction* insn, const void *miiArg) { 00740 uint8_t attrMask; 00741 uint16_t instructionID; 00742 00743 dbgprintf(insn, "getID()"); 00744 00745 attrMask = ATTR_NONE; 00746 00747 if (insn->mode == MODE_64BIT) 00748 attrMask |= ATTR_64BIT; 00749 00750 if (insn->vexSize) { 00751 attrMask |= ATTR_VEX; 00752 00753 if (insn->vexSize == 3) { 00754 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 00755 case VEX_PREFIX_66: 00756 attrMask |= ATTR_OPSIZE; 00757 break; 00758 case VEX_PREFIX_F3: 00759 attrMask |= ATTR_XS; 00760 break; 00761 case VEX_PREFIX_F2: 00762 attrMask |= ATTR_XD; 00763 break; 00764 } 00765 00766 if (lFromVEX3of3(insn->vexPrefix[2])) 00767 attrMask |= ATTR_VEXL; 00768 } 00769 else if (insn->vexSize == 2) { 00770 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 00771 case VEX_PREFIX_66: 00772 attrMask |= ATTR_OPSIZE; 00773 break; 00774 case VEX_PREFIX_F3: 00775 attrMask |= ATTR_XS; 00776 break; 00777 case VEX_PREFIX_F2: 00778 attrMask |= ATTR_XD; 00779 break; 00780 } 00781 00782 if (lFromVEX2of2(insn->vexPrefix[1])) 00783 attrMask |= ATTR_VEXL; 00784 } 00785 else { 00786 return -1; 00787 } 00788 } 00789 else { 00790 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 00791 attrMask |= ATTR_OPSIZE; 00792 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 00793 attrMask |= ATTR_ADSIZE; 00794 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 00795 attrMask |= ATTR_XS; 00796 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 00797 attrMask |= ATTR_XD; 00798 } 00799 00800 if (insn->rexPrefix & 0x08) 00801 attrMask |= ATTR_REXW; 00802 00803 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 00804 return -1; 00805 00806 /* The following clauses compensate for limitations of the tables. */ 00807 00808 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 00809 !(attrMask & ATTR_OPSIZE)) { 00810 /* 00811 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 00812 * has precedence since there are no L-bit with W-bit entries in the tables. 00813 * So if the L-bit isn't significant we should use the W-bit instead. 00814 * We only need to do this if the instruction doesn't specify OpSize since 00815 * there is a VEX_L_W_OPSIZE table. 00816 */ 00817 00818 const struct InstructionSpecifier *spec; 00819 uint16_t instructionIDWithWBit; 00820 const struct InstructionSpecifier *specWithWBit; 00821 00822 spec = specifierForUID(instructionID); 00823 00824 if (getIDWithAttrMask(&instructionIDWithWBit, 00825 insn, 00826 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 00827 insn->instructionID = instructionID; 00828 insn->spec = spec; 00829 return 0; 00830 } 00831 00832 specWithWBit = specifierForUID(instructionIDWithWBit); 00833 00834 if (instructionID != instructionIDWithWBit) { 00835 insn->instructionID = instructionIDWithWBit; 00836 insn->spec = specWithWBit; 00837 } else { 00838 insn->instructionID = instructionID; 00839 insn->spec = spec; 00840 } 00841 return 0; 00842 } 00843 00844 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 00845 /* 00846 * The instruction tables make no distinction between instructions that 00847 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 00848 * particular spot (i.e., many MMX operations). In general we're 00849 * conservative, but in the specific case where OpSize is present but not 00850 * in the right place we check if there's a 16-bit operation. 00851 */ 00852 00853 const struct InstructionSpecifier *spec; 00854 uint16_t instructionIDWithOpsize; 00855 const char *specName, *specWithOpSizeName; 00856 00857 spec = specifierForUID(instructionID); 00858 00859 if (getIDWithAttrMask(&instructionIDWithOpsize, 00860 insn, 00861 attrMask | ATTR_OPSIZE)) { 00862 /* 00863 * ModRM required with OpSize but not present; give up and return version 00864 * without OpSize set 00865 */ 00866 00867 insn->instructionID = instructionID; 00868 insn->spec = spec; 00869 return 0; 00870 } 00871 00872 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 00873 specWithOpSizeName = 00874 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 00875 00876 if (is16BitEquivalent(specName, specWithOpSizeName)) { 00877 insn->instructionID = instructionIDWithOpsize; 00878 insn->spec = specifierForUID(instructionIDWithOpsize); 00879 } else { 00880 insn->instructionID = instructionID; 00881 insn->spec = spec; 00882 } 00883 return 0; 00884 } 00885 00886 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 00887 insn->rexPrefix & 0x01) { 00888 /* 00889 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 00890 * it should decode as XCHG %r8, %eax. 00891 */ 00892 00893 const struct InstructionSpecifier *spec; 00894 uint16_t instructionIDWithNewOpcode; 00895 const struct InstructionSpecifier *specWithNewOpcode; 00896 00897 spec = specifierForUID(instructionID); 00898 00899 /* Borrow opcode from one of the other XCHGar opcodes */ 00900 insn->opcode = 0x91; 00901 00902 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 00903 insn, 00904 attrMask)) { 00905 insn->opcode = 0x90; 00906 00907 insn->instructionID = instructionID; 00908 insn->spec = spec; 00909 return 0; 00910 } 00911 00912 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 00913 00914 /* Change back */ 00915 insn->opcode = 0x90; 00916 00917 insn->instructionID = instructionIDWithNewOpcode; 00918 insn->spec = specWithNewOpcode; 00919 00920 return 0; 00921 } 00922 00923 insn->instructionID = instructionID; 00924 insn->spec = specifierForUID(insn->instructionID); 00925 00926 return 0; 00927 } 00928 00929 /* 00930 * readSIB - Consumes the SIB byte to determine addressing information for an 00931 * instruction. 00932 * 00933 * @param insn - The instruction whose SIB byte is to be read. 00934 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 00935 */ 00936 static int readSIB(struct InternalInstruction* insn) { 00937 SIBIndex sibIndexBase = 0; 00938 SIBBase sibBaseBase = 0; 00939 uint8_t index, base; 00940 00941 dbgprintf(insn, "readSIB()"); 00942 00943 if (insn->consumedSIB) 00944 return 0; 00945 00946 insn->consumedSIB = TRUE; 00947 00948 switch (insn->addressSize) { 00949 case 2: 00950 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 00951 return -1; 00952 break; 00953 case 4: 00954 sibIndexBase = SIB_INDEX_EAX; 00955 sibBaseBase = SIB_BASE_EAX; 00956 break; 00957 case 8: 00958 sibIndexBase = SIB_INDEX_RAX; 00959 sibBaseBase = SIB_BASE_RAX; 00960 break; 00961 } 00962 00963 if (consumeByte(insn, &insn->sib)) 00964 return -1; 00965 00966 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 00967 00968 switch (index) { 00969 case 0x4: 00970 insn->sibIndex = SIB_INDEX_NONE; 00971 break; 00972 default: 00973 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 00974 if (insn->sibIndex == SIB_INDEX_sib || 00975 insn->sibIndex == SIB_INDEX_sib64) 00976 insn->sibIndex = SIB_INDEX_NONE; 00977 break; 00978 } 00979 00980 switch (scaleFromSIB(insn->sib)) { 00981 case 0: 00982 insn->sibScale = 1; 00983 break; 00984 case 1: 00985 insn->sibScale = 2; 00986 break; 00987 case 2: 00988 insn->sibScale = 4; 00989 break; 00990 case 3: 00991 insn->sibScale = 8; 00992 break; 00993 } 00994 00995 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 00996 00997 switch (base) { 00998 case 0x5: 00999 switch (modFromModRM(insn->modRM)) { 01000 case 0x0: 01001 insn->eaDisplacement = EA_DISP_32; 01002 insn->sibBase = SIB_BASE_NONE; 01003 break; 01004 case 0x1: 01005 insn->eaDisplacement = EA_DISP_8; 01006 insn->sibBase = (insn->addressSize == 4 ? 01007 SIB_BASE_EBP : SIB_BASE_RBP); 01008 break; 01009 case 0x2: 01010 insn->eaDisplacement = EA_DISP_32; 01011 insn->sibBase = (insn->addressSize == 4 ? 01012 SIB_BASE_EBP : SIB_BASE_RBP); 01013 break; 01014 case 0x3: 01015 debug("Cannot have Mod = 0b11 and a SIB byte"); 01016 return -1; 01017 } 01018 break; 01019 default: 01020 insn->sibBase = (SIBBase)(sibBaseBase + base); 01021 break; 01022 } 01023 01024 return 0; 01025 } 01026 01027 /* 01028 * readDisplacement - Consumes the displacement of an instruction. 01029 * 01030 * @param insn - The instruction whose displacement is to be read. 01031 * @return - 0 if the displacement byte was successfully read; nonzero 01032 * otherwise. 01033 */ 01034 static int readDisplacement(struct InternalInstruction* insn) { 01035 int8_t d8; 01036 int16_t d16; 01037 int32_t d32; 01038 01039 dbgprintf(insn, "readDisplacement()"); 01040 01041 if (insn->consumedDisplacement) 01042 return 0; 01043 01044 insn->consumedDisplacement = TRUE; 01045 insn->displacementOffset = insn->readerCursor - insn->startLocation; 01046 01047 switch (insn->eaDisplacement) { 01048 case EA_DISP_NONE: 01049 insn->consumedDisplacement = FALSE; 01050 break; 01051 case EA_DISP_8: 01052 if (consumeInt8(insn, &d8)) 01053 return -1; 01054 insn->displacement = d8; 01055 break; 01056 case EA_DISP_16: 01057 if (consumeInt16(insn, &d16)) 01058 return -1; 01059 insn->displacement = d16; 01060 break; 01061 case EA_DISP_32: 01062 if (consumeInt32(insn, &d32)) 01063 return -1; 01064 insn->displacement = d32; 01065 break; 01066 } 01067 01068 insn->consumedDisplacement = TRUE; 01069 return 0; 01070 } 01071 01072 /* 01073 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 01074 * displacement) for an instruction and interprets it. 01075 * 01076 * @param insn - The instruction whose addressing information is to be read. 01077 * @return - 0 if the information was successfully read; nonzero otherwise. 01078 */ 01079 static int readModRM(struct InternalInstruction* insn) { 01080 uint8_t mod, rm, reg; 01081 01082 dbgprintf(insn, "readModRM()"); 01083 01084 if (insn->consumedModRM) 01085 return 0; 01086 01087 if (consumeByte(insn, &insn->modRM)) 01088 return -1; 01089 insn->consumedModRM = TRUE; 01090 01091 mod = modFromModRM(insn->modRM); 01092 rm = rmFromModRM(insn->modRM); 01093 reg = regFromModRM(insn->modRM); 01094 01095 /* 01096 * This goes by insn->registerSize to pick the correct register, which messes 01097 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 01098 * fixupReg(). 01099 */ 01100 switch (insn->registerSize) { 01101 case 2: 01102 insn->regBase = MODRM_REG_AX; 01103 insn->eaRegBase = EA_REG_AX; 01104 break; 01105 case 4: 01106 insn->regBase = MODRM_REG_EAX; 01107 insn->eaRegBase = EA_REG_EAX; 01108 break; 01109 case 8: 01110 insn->regBase = MODRM_REG_RAX; 01111 insn->eaRegBase = EA_REG_RAX; 01112 break; 01113 } 01114 01115 reg |= rFromREX(insn->rexPrefix) << 3; 01116 rm |= bFromREX(insn->rexPrefix) << 3; 01117 01118 insn->reg = (Reg)(insn->regBase + reg); 01119 01120 switch (insn->addressSize) { 01121 case 2: 01122 insn->eaBaseBase = EA_BASE_BX_SI; 01123 01124 switch (mod) { 01125 case 0x0: 01126 if (rm == 0x6) { 01127 insn->eaBase = EA_BASE_NONE; 01128 insn->eaDisplacement = EA_DISP_16; 01129 if (readDisplacement(insn)) 01130 return -1; 01131 } else { 01132 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01133 insn->eaDisplacement = EA_DISP_NONE; 01134 } 01135 break; 01136 case 0x1: 01137 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01138 insn->eaDisplacement = EA_DISP_8; 01139 if (readDisplacement(insn)) 01140 return -1; 01141 break; 01142 case 0x2: 01143 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01144 insn->eaDisplacement = EA_DISP_16; 01145 if (readDisplacement(insn)) 01146 return -1; 01147 break; 01148 case 0x3: 01149 insn->eaBase = (EABase)(insn->eaRegBase + rm); 01150 if (readDisplacement(insn)) 01151 return -1; 01152 break; 01153 } 01154 break; 01155 case 4: 01156 case 8: 01157 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 01158 01159 switch (mod) { 01160 case 0x0: 01161 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 01162 switch (rm) { 01163 case 0x4: 01164 case 0xc: /* in case REXW.b is set */ 01165 insn->eaBase = (insn->addressSize == 4 ? 01166 EA_BASE_sib : EA_BASE_sib64); 01167 readSIB(insn); 01168 if (readDisplacement(insn)) 01169 return -1; 01170 break; 01171 case 0x5: 01172 insn->eaBase = EA_BASE_NONE; 01173 insn->eaDisplacement = EA_DISP_32; 01174 if (readDisplacement(insn)) 01175 return -1; 01176 break; 01177 default: 01178 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01179 break; 01180 } 01181 break; 01182 case 0x1: 01183 case 0x2: 01184 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 01185 switch (rm) { 01186 case 0x4: 01187 case 0xc: /* in case REXW.b is set */ 01188 insn->eaBase = EA_BASE_sib; 01189 readSIB(insn); 01190 if (readDisplacement(insn)) 01191 return -1; 01192 break; 01193 default: 01194 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 01195 if (readDisplacement(insn)) 01196 return -1; 01197 break; 01198 } 01199 break; 01200 case 0x3: 01201 insn->eaDisplacement = EA_DISP_NONE; 01202 insn->eaBase = (EABase)(insn->eaRegBase + rm); 01203 break; 01204 } 01205 break; 01206 } /* switch (insn->addressSize) */ 01207 01208 return 0; 01209 } 01210 01211 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 01212 static uint8_t name(struct InternalInstruction *insn, \ 01213 OperandType type, \ 01214 uint8_t index, \ 01215 uint8_t *valid) { \ 01216 *valid = 1; \ 01217 switch (type) { \ 01218 default: \ 01219 debug("Unhandled register type"); \ 01220 *valid = 0; \ 01221 return 0; \ 01222 case TYPE_Rv: \ 01223 return base + index; \ 01224 case TYPE_R8: \ 01225 if (insn->rexPrefix && \ 01226 index >= 4 && index <= 7) { \ 01227 return prefix##_SPL + (index - 4); \ 01228 } else { \ 01229 return prefix##_AL + index; \ 01230 } \ 01231 case TYPE_R16: \ 01232 return prefix##_AX + index; \ 01233 case TYPE_R32: \ 01234 return prefix##_EAX + index; \ 01235 case TYPE_R64: \ 01236 return prefix##_RAX + index; \ 01237 case TYPE_XMM256: \ 01238 return prefix##_YMM0 + index; \ 01239 case TYPE_XMM128: \ 01240 case TYPE_XMM64: \ 01241 case TYPE_XMM32: \ 01242 case TYPE_XMM: \ 01243 return prefix##_XMM0 + index; \ 01244 case TYPE_MM64: \ 01245 case TYPE_MM32: \ 01246 case TYPE_MM: \ 01247 if (index > 7) \ 01248 *valid = 0; \ 01249 return prefix##_MM0 + index; \ 01250 case TYPE_SEGMENTREG: \ 01251 if (index > 5) \ 01252 *valid = 0; \ 01253 return prefix##_ES + index; \ 01254 case TYPE_DEBUGREG: \ 01255 if (index > 7) \ 01256 *valid = 0; \ 01257 return prefix##_DR0 + index; \ 01258 case TYPE_CONTROLREG: \ 01259 if (index > 8) \ 01260 *valid = 0; \ 01261 return prefix##_CR0 + index; \ 01262 } \ 01263 } 01264 01265 /* 01266 * fixup*Value - Consults an operand type to determine the meaning of the 01267 * reg or R/M field. If the operand is an XMM operand, for example, an 01268 * operand would be XMM0 instead of AX, which readModRM() would otherwise 01269 * misinterpret it as. 01270 * 01271 * @param insn - The instruction containing the operand. 01272 * @param type - The operand type. 01273 * @param index - The existing value of the field as reported by readModRM(). 01274 * @param valid - The address of a uint8_t. The target is set to 1 if the 01275 * field is valid for the register class; 0 if not. 01276 * @return - The proper value. 01277 */ 01278 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 01279 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 01280 01281 /* 01282 * fixupReg - Consults an operand specifier to determine which of the 01283 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 01284 * 01285 * @param insn - See fixup*Value(). 01286 * @param op - The operand specifier. 01287 * @return - 0 if fixup was successful; -1 if the register returned was 01288 * invalid for its class. 01289 */ 01290 static int fixupReg(struct InternalInstruction *insn, 01291 const struct OperandSpecifier *op) { 01292 uint8_t valid; 01293 01294 dbgprintf(insn, "fixupReg()"); 01295 01296 switch ((OperandEncoding)op->encoding) { 01297 default: 01298 debug("Expected a REG or R/M encoding in fixupReg"); 01299 return -1; 01300 case ENCODING_VVVV: 01301 insn->vvvv = (Reg)fixupRegValue(insn, 01302 (OperandType)op->type, 01303 insn->vvvv, 01304 &valid); 01305 if (!valid) 01306 return -1; 01307 break; 01308 case ENCODING_REG: 01309 insn->reg = (Reg)fixupRegValue(insn, 01310 (OperandType)op->type, 01311 insn->reg - insn->regBase, 01312 &valid); 01313 if (!valid) 01314 return -1; 01315 break; 01316 case ENCODING_RM: 01317 if (insn->eaBase >= insn->eaRegBase) { 01318 insn->eaBase = (EABase)fixupRMValue(insn, 01319 (OperandType)op->type, 01320 insn->eaBase - insn->eaRegBase, 01321 &valid); 01322 if (!valid) 01323 return -1; 01324 } 01325 break; 01326 } 01327 01328 return 0; 01329 } 01330 01331 /* 01332 * readOpcodeModifier - Reads an operand from the opcode field of an 01333 * instruction. Handles AddRegFrm instructions. 01334 * 01335 * @param insn - The instruction whose opcode field is to be read. 01336 * @param inModRM - Indicates that the opcode field is to be read from the 01337 * ModR/M extension; useful for escape opcodes 01338 * @return - 0 on success; nonzero otherwise. 01339 */ 01340 static int readOpcodeModifier(struct InternalInstruction* insn) { 01341 dbgprintf(insn, "readOpcodeModifier()"); 01342 01343 if (insn->consumedOpcodeModifier) 01344 return 0; 01345 01346 insn->consumedOpcodeModifier = TRUE; 01347 01348 switch (insn->spec->modifierType) { 01349 default: 01350 debug("Unknown modifier type."); 01351 return -1; 01352 case MODIFIER_NONE: 01353 debug("No modifier but an operand expects one."); 01354 return -1; 01355 case MODIFIER_OPCODE: 01356 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 01357 return 0; 01358 case MODIFIER_MODRM: 01359 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 01360 return 0; 01361 } 01362 } 01363 01364 /* 01365 * readOpcodeRegister - Reads an operand from the opcode field of an 01366 * instruction and interprets it appropriately given the operand width. 01367 * Handles AddRegFrm instructions. 01368 * 01369 * @param insn - See readOpcodeModifier(). 01370 * @param size - The width (in bytes) of the register being specified. 01371 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 01372 * RAX. 01373 * @return - 0 on success; nonzero otherwise. 01374 */ 01375 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 01376 dbgprintf(insn, "readOpcodeRegister()"); 01377 01378 if (readOpcodeModifier(insn)) 01379 return -1; 01380 01381 if (size == 0) 01382 size = insn->registerSize; 01383 01384 switch (size) { 01385 case 1: 01386 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 01387 | insn->opcodeModifier)); 01388 if (insn->rexPrefix && 01389 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 01390 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 01391 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 01392 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 01393 } 01394 01395 break; 01396 case 2: 01397 insn->opcodeRegister = (Reg)(MODRM_REG_AX 01398 + ((bFromREX(insn->rexPrefix) << 3) 01399 | insn->opcodeModifier)); 01400 break; 01401 case 4: 01402 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 01403 + ((bFromREX(insn->rexPrefix) << 3) 01404 | insn->opcodeModifier)); 01405 break; 01406 case 8: 01407 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 01408 + ((bFromREX(insn->rexPrefix) << 3) 01409 | insn->opcodeModifier)); 01410 break; 01411 } 01412 01413 return 0; 01414 } 01415 01416 /* 01417 * readImmediate - Consumes an immediate operand from an instruction, given the 01418 * desired operand size. 01419 * 01420 * @param insn - The instruction whose operand is to be read. 01421 * @param size - The width (in bytes) of the operand. 01422 * @return - 0 if the immediate was successfully consumed; nonzero 01423 * otherwise. 01424 */ 01425 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 01426 uint8_t imm8; 01427 uint16_t imm16; 01428 uint32_t imm32; 01429 uint64_t imm64; 01430 01431 dbgprintf(insn, "readImmediate()"); 01432 01433 if (insn->numImmediatesConsumed == 2) { 01434 debug("Already consumed two immediates"); 01435 return -1; 01436 } 01437 01438 if (size == 0) 01439 size = insn->immediateSize; 01440 else 01441 insn->immediateSize = size; 01442 insn->immediateOffset = insn->readerCursor - insn->startLocation; 01443 01444 switch (size) { 01445 case 1: 01446 if (consumeByte(insn, &imm8)) 01447 return -1; 01448 insn->immediates[insn->numImmediatesConsumed] = imm8; 01449 break; 01450 case 2: 01451 if (consumeUInt16(insn, &imm16)) 01452 return -1; 01453 insn->immediates[insn->numImmediatesConsumed] = imm16; 01454 break; 01455 case 4: 01456 if (consumeUInt32(insn, &imm32)) 01457 return -1; 01458 insn->immediates[insn->numImmediatesConsumed] = imm32; 01459 break; 01460 case 8: 01461 if (consumeUInt64(insn, &imm64)) 01462 return -1; 01463 insn->immediates[insn->numImmediatesConsumed] = imm64; 01464 break; 01465 } 01466 01467 insn->numImmediatesConsumed++; 01468 01469 return 0; 01470 } 01471 01472 /* 01473 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 01474 * 01475 * @param insn - The instruction whose operand is to be read. 01476 * @return - 0 if the vvvv was successfully consumed; nonzero 01477 * otherwise. 01478 */ 01479 static int readVVVV(struct InternalInstruction* insn) { 01480 dbgprintf(insn, "readVVVV()"); 01481 01482 if (insn->vexSize == 3) 01483 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 01484 else if (insn->vexSize == 2) 01485 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 01486 else 01487 return -1; 01488 01489 if (insn->mode != MODE_64BIT) 01490 insn->vvvv &= 0x7; 01491 01492 return 0; 01493 } 01494 01495 /* 01496 * readOperands - Consults the specifier for an instruction and consumes all 01497 * operands for that instruction, interpreting them as it goes. 01498 * 01499 * @param insn - The instruction whose operands are to be read and interpreted. 01500 * @return - 0 if all operands could be read; nonzero otherwise. 01501 */ 01502 static int readOperands(struct InternalInstruction* insn) { 01503 int index; 01504 int hasVVVV, needVVVV; 01505 int sawRegImm = 0; 01506 01507 dbgprintf(insn, "readOperands()"); 01508 01509 /* If non-zero vvvv specified, need to make sure one of the operands 01510 uses it. */ 01511 hasVVVV = !readVVVV(insn); 01512 needVVVV = hasVVVV && (insn->vvvv != 0); 01513 01514 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 01515 switch (x86OperandSets[insn->spec->operands][index].encoding) { 01516 case ENCODING_NONE: 01517 break; 01518 case ENCODING_REG: 01519 case ENCODING_RM: 01520 if (readModRM(insn)) 01521 return -1; 01522 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 01523 return -1; 01524 break; 01525 case ENCODING_CB: 01526 case ENCODING_CW: 01527 case ENCODING_CD: 01528 case ENCODING_CP: 01529 case ENCODING_CO: 01530 case ENCODING_CT: 01531 dbgprintf(insn, "We currently don't hande code-offset encodings"); 01532 return -1; 01533 case ENCODING_IB: 01534 if (sawRegImm) { 01535 /* Saw a register immediate so don't read again and instead split the 01536 previous immediate. FIXME: This is a hack. */ 01537 insn->immediates[insn->numImmediatesConsumed] = 01538 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 01539 ++insn->numImmediatesConsumed; 01540 break; 01541 } 01542 if (readImmediate(insn, 1)) 01543 return -1; 01544 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 01545 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 01546 return -1; 01547 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 01548 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 01549 return -1; 01550 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 01551 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 01552 sawRegImm = 1; 01553 break; 01554 case ENCODING_IW: 01555 if (readImmediate(insn, 2)) 01556 return -1; 01557 break; 01558 case ENCODING_ID: 01559 if (readImmediate(insn, 4)) 01560 return -1; 01561 break; 01562 case ENCODING_IO: 01563 if (readImmediate(insn, 8)) 01564 return -1; 01565 break; 01566 case ENCODING_Iv: 01567 if (readImmediate(insn, insn->immediateSize)) 01568 return -1; 01569 break; 01570 case ENCODING_Ia: 01571 if (readImmediate(insn, insn->addressSize)) 01572 return -1; 01573 break; 01574 case ENCODING_RB: 01575 if (readOpcodeRegister(insn, 1)) 01576 return -1; 01577 break; 01578 case ENCODING_RW: 01579 if (readOpcodeRegister(insn, 2)) 01580 return -1; 01581 break; 01582 case ENCODING_RD: 01583 if (readOpcodeRegister(insn, 4)) 01584 return -1; 01585 break; 01586 case ENCODING_RO: 01587 if (readOpcodeRegister(insn, 8)) 01588 return -1; 01589 break; 01590 case ENCODING_Rv: 01591 if (readOpcodeRegister(insn, 0)) 01592 return -1; 01593 break; 01594 case ENCODING_I: 01595 if (readOpcodeModifier(insn)) 01596 return -1; 01597 break; 01598 case ENCODING_VVVV: 01599 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 01600 if (!hasVVVV) 01601 return -1; 01602 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 01603 return -1; 01604 break; 01605 case ENCODING_DUP: 01606 break; 01607 default: 01608 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 01609 return -1; 01610 } 01611 } 01612 01613 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 01614 if (needVVVV) return -1; 01615 01616 return 0; 01617 } 01618 01619 /* 01620 * decodeInstruction - Reads and interprets a full instruction provided by the 01621 * user. 01622 * 01623 * @param insn - A pointer to the instruction to be populated. Must be 01624 * pre-allocated. 01625 * @param reader - The function to be used to read the instruction's bytes. 01626 * @param readerArg - A generic argument to be passed to the reader to store 01627 * any internal state. 01628 * @param logger - If non-NULL, the function to be used to write log messages 01629 * and warnings. 01630 * @param loggerArg - A generic argument to be passed to the logger to store 01631 * any internal state. 01632 * @param startLoc - The address (in the reader's address space) of the first 01633 * byte in the instruction. 01634 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 01635 * decode the instruction in. 01636 * @return - 0 if the instruction's memory could be read; nonzero if 01637 * not. 01638 */ 01639 int decodeInstruction(struct InternalInstruction* insn, 01640 byteReader_t reader, 01641 const void* readerArg, 01642 dlog_t logger, 01643 void* loggerArg, 01644 const void* miiArg, 01645 uint64_t startLoc, 01646 DisassemblerMode mode) { 01647 memset(insn, 0, sizeof(struct InternalInstruction)); 01648 01649 insn->reader = reader; 01650 insn->readerArg = readerArg; 01651 insn->dlog = logger; 01652 insn->dlogArg = loggerArg; 01653 insn->startLocation = startLoc; 01654 insn->readerCursor = startLoc; 01655 insn->mode = mode; 01656 insn->numImmediatesConsumed = 0; 01657 01658 if (readPrefixes(insn) || 01659 readOpcode(insn) || 01660 getID(insn, miiArg) || 01661 insn->instructionID == 0 || 01662 readOperands(insn)) 01663 return -1; 01664 01665 insn->operands = &x86OperandSets[insn->spec->operands][0]; 01666 01667 insn->length = insn->readerCursor - insn->startLocation; 01668 01669 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 01670 startLoc, insn->readerCursor, insn->length); 01671 01672 if (insn->length > 15) 01673 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 01674 01675 return 0; 01676 }