LLVM  6.0.0svn
X86DisassemblerDecoder.cpp
Go to the documentation of this file.
1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler.
11 // It contains the implementation of the instruction decoder.
12 // Documentation for the disassembler can be found in X86Disassembler.h.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include <cstdarg> /* for va_*() */
17 #include <cstdio> /* for vsnprintf() */
18 #include <cstdlib> /* for exit() */
19 #include <cstring> /* for memset() */
20 
21 #include "X86DisassemblerDecoder.h"
22 
23 using namespace llvm::X86Disassembler;
24 
25 /// Specifies whether a ModR/M byte is needed and (if so) which
26 /// instruction each possible value of the ModR/M byte corresponds to. Once
27 /// this information is known, we have narrowed down to a single instruction.
28 struct ModRMDecision {
29  uint8_t modrm_type;
30  uint16_t instructionIDs;
31 };
32 
33 /// Specifies which set of ModR/M->instruction tables to look at
34 /// given a particular opcode.
36  ModRMDecision modRMDecisions[256];
37 };
38 
39 /// Specifies which opcode->instruction tables to look at given
40 /// a particular context (set of attributes). Since there are many possible
41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
42 /// applies given a specific set of attributes. Hence there are only IC_max
43 /// entries in this table, rather than 2^(ATTR_max).
45  OpcodeDecision opcodeDecisions[IC_max];
46 };
47 
48 #include "X86GenDisassemblerTables.inc"
49 
50 #ifndef NDEBUG
51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
52 #else
53 #define debug(s) do { } while (0)
54 #endif
55 
56 /*
57  * contextForAttrs - Client for the instruction context table. Takes a set of
58  * attributes and returns the appropriate decode context.
59  *
60  * @param attrMask - Attributes, from the enumeration attributeBits.
61  * @return - The InstructionContext to use when looking up an
62  * an instruction with these attributes.
63  */
64 static InstructionContext contextForAttrs(uint16_t attrMask) {
65  return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
66 }
67 
68 /*
69  * modRMRequired - Reads the appropriate instruction table to determine whether
70  * the ModR/M byte is required to decode a particular instruction.
71  *
72  * @param type - The opcode type (i.e., how many bytes it has).
73  * @param insnContext - The context for the instruction, as returned by
74  * contextForAttrs.
75  * @param opcode - The last byte of the instruction's opcode, not counting
76  * ModR/M extensions and escapes.
77  * @return - true if the ModR/M byte is required, false otherwise.
78  */
79 static int modRMRequired(OpcodeType type,
80  InstructionContext insnContext,
81  uint16_t opcode) {
82  const struct ContextDecision* decision = nullptr;
83 
84  switch (type) {
85  case ONEBYTE:
86  decision = &ONEBYTE_SYM;
87  break;
88  case TWOBYTE:
89  decision = &TWOBYTE_SYM;
90  break;
91  case THREEBYTE_38:
92  decision = &THREEBYTE38_SYM;
93  break;
94  case THREEBYTE_3A:
95  decision = &THREEBYTE3A_SYM;
96  break;
97  case XOP8_MAP:
98  decision = &XOP8_MAP_SYM;
99  break;
100  case XOP9_MAP:
101  decision = &XOP9_MAP_SYM;
102  break;
103  case XOPA_MAP:
104  decision = &XOPA_MAP_SYM;
105  break;
106  }
107 
108  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
109  modrm_type != MODRM_ONEENTRY;
110 }
111 
112 /*
113  * decode - Reads the appropriate instruction table to obtain the unique ID of
114  * an instruction.
115  *
116  * @param type - See modRMRequired().
117  * @param insnContext - See modRMRequired().
118  * @param opcode - See modRMRequired().
119  * @param modRM - The ModR/M byte if required, or any value if not.
120  * @return - The UID of the instruction, or 0 on failure.
121  */
123  InstructionContext insnContext,
124  uint8_t opcode,
125  uint8_t modRM) {
126  const struct ModRMDecision* dec = nullptr;
127 
128  switch (type) {
129  case ONEBYTE:
130  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
131  break;
132  case TWOBYTE:
133  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
134  break;
135  case THREEBYTE_38:
136  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
137  break;
138  case THREEBYTE_3A:
139  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
140  break;
141  case XOP8_MAP:
142  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
143  break;
144  case XOP9_MAP:
145  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
146  break;
147  case XOPA_MAP:
148  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
149  break;
150  }
151 
152  switch (dec->modrm_type) {
153  default:
154  debug("Corrupt table! Unknown modrm_type");
155  return 0;
156  case MODRM_ONEENTRY:
157  return modRMTable[dec->instructionIDs];
158  case MODRM_SPLITRM:
159  if (modFromModRM(modRM) == 0x3)
160  return modRMTable[dec->instructionIDs+1];
161  return modRMTable[dec->instructionIDs];
162  case MODRM_SPLITREG:
163  if (modFromModRM(modRM) == 0x3)
164  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
165  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
166  case MODRM_SPLITMISC:
167  if (modFromModRM(modRM) == 0x3)
168  return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
169  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
170  case MODRM_FULL:
171  return modRMTable[dec->instructionIDs+modRM];
172  }
173 }
174 
175 /*
176  * specifierForUID - Given a UID, returns the name and operand specification for
177  * that instruction.
178  *
179  * @param uid - The unique ID for the instruction. This should be returned by
180  * decode(); specifierForUID will not check bounds.
181  * @return - A pointer to the specification for that instruction.
182  */
183 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
184  return &INSTRUCTIONS_SYM[uid];
185 }
186 
187 /*
188  * consumeByte - Uses the reader function provided by the user to consume one
189  * byte from the instruction's memory and advance the cursor.
190  *
191  * @param insn - The instruction with the reader function to use. The cursor
192  * for this instruction is advanced.
193  * @param byte - A pointer to a pre-allocated memory buffer to be populated
194  * with the data read.
195  * @return - 0 if the read was successful; nonzero otherwise.
196  */
197 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
198  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
199 
200  if (!ret)
201  ++(insn->readerCursor);
202 
203  return ret;
204 }
205 
206 /*
207  * lookAtByte - Like consumeByte, but does not advance the cursor.
208  *
209  * @param insn - See consumeByte().
210  * @param byte - See consumeByte().
211  * @return - See consumeByte().
212  */
213 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
214  return insn->reader(insn->readerArg, byte, insn->readerCursor);
215 }
216 
217 static void unconsumeByte(struct InternalInstruction* insn) {
218  insn->readerCursor--;
219 }
220 
221 #define CONSUME_FUNC(name, type) \
222  static int name(struct InternalInstruction* insn, type* ptr) { \
223  type combined = 0; \
224  unsigned offset; \
225  for (offset = 0; offset < sizeof(type); ++offset) { \
226  uint8_t byte; \
227  int ret = insn->reader(insn->readerArg, \
228  &byte, \
229  insn->readerCursor + offset); \
230  if (ret) \
231  return ret; \
232  combined = combined | ((uint64_t)byte << (offset * 8)); \
233  } \
234  *ptr = combined; \
235  insn->readerCursor += sizeof(type); \
236  return 0; \
237  }
238 
239 /*
240  * consume* - Use the reader function provided by the user to consume data
241  * values of various sizes from the instruction's memory and advance the
242  * cursor appropriately. These readers perform endian conversion.
243  *
244  * @param insn - See consumeByte().
245  * @param ptr - A pointer to a pre-allocated memory of appropriate size to
246  * be populated with the data read.
247  * @return - See consumeByte().
248  */
249 CONSUME_FUNC(consumeInt8, int8_t)
250 CONSUME_FUNC(consumeInt16, int16_t)
251 CONSUME_FUNC(consumeInt32, int32_t)
252 CONSUME_FUNC(consumeUInt16, uint16_t)
253 CONSUME_FUNC(consumeUInt32, uint32_t)
254 CONSUME_FUNC(consumeUInt64, uint64_t)
255 
256 /*
257  * dbgprintf - Uses the logging function provided by the user to log a single
258  * message, typically without a carriage-return.
259  *
260  * @param insn - The instruction containing the logging function.
261  * @param format - See printf().
262  * @param ... - See printf().
263  */
264 static void dbgprintf(struct InternalInstruction* insn,
265  const char* format,
266  ...) {
267  char buffer[256];
268  va_list ap;
269 
270  if (!insn->dlog)
271  return;
272 
273  va_start(ap, format);
274  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
275  va_end(ap);
276 
277  insn->dlog(insn->dlogArg, buffer);
278 }
279 
280 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
281  if (insn->mode == MODE_64BIT)
282  return prefix >= 0x40 && prefix <= 0x4f;
283  return false;
284 }
285 
286 /*
287  * setPrefixPresent - Marks that a particular prefix is present as mandatory
288  *
289  * @param insn - The instruction to be marked as having the prefix.
290  * @param prefix - The prefix that is present.
291  */
292 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
293  uint8_t nextByte;
294  switch (prefix) {
295  case 0xf2:
296  case 0xf3:
297  if (lookAtByte(insn, &nextByte))
298  break;
299  // TODO:
300  // 1. There could be several 0x66
301  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
302  // it's not mandatory prefix
303  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
304  // 0x0f exactly after it to be mandatory prefix
305  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
306  // The last of 0xf2 /0xf3 is mandatory prefix
307  insn->mandatoryPrefix = prefix;
308  insn->repeatPrefix = prefix;
309  break;
310  case 0x66:
311  if (lookAtByte(insn, &nextByte))
312  break;
313  // 0x66 can't overwrite existing mandatory prefix and should be ignored
314  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
315  insn->mandatoryPrefix = prefix;
316  break;
317  }
318 }
319 
320 /*
321  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
322  * instruction as having them. Also sets the instruction's default operand,
323  * address, and other relevant data sizes to report operands correctly.
324  *
325  * @param insn - The instruction whose prefixes are to be read.
326  * @return - 0 if the instruction could be read until the end of the prefix
327  * bytes, and no prefixes conflicted; nonzero otherwise.
328  */
329 static int readPrefixes(struct InternalInstruction* insn) {
330  bool isPrefix = true;
331  uint8_t byte = 0;
332  uint8_t nextByte;
333 
334  dbgprintf(insn, "readPrefixes()");
335 
336  while (isPrefix) {
337  /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
338  if (consumeByte(insn, &byte))
339  break;
340 
341  /*
342  * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
343  * break and let it be disassembled as a normal "instruction".
344  */
345  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
346  break;
347 
348  if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
349  /*
350  * If the byte is 0xf2 or 0xf3, and any of the following conditions are
351  * met:
352  * - it is followed by a LOCK (0xf0) prefix
353  * - it is followed by an xchg instruction
354  * then it should be disassembled as a xacquire/xrelease not repne/rep.
355  */
356  if (((nextByte == 0xf0) ||
357  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
358  insn->xAcquireRelease = true;
359  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
360  break;
361  }
362  /*
363  * Also if the byte is 0xf3, and the following condition is met:
364  * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
365  * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
366  * then it should be disassembled as an xrelease not rep.
367  */
368  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
369  nextByte == 0xc6 || nextByte == 0xc7)) {
370  insn->xAcquireRelease = true;
371  if (nextByte != 0x90) // PAUSE instruction support
372  break;
373  }
374  if (isREX(insn, nextByte)) {
375  uint8_t nnextByte;
376  // Go to REX prefix after the current one
377  if (consumeByte(insn, &nnextByte))
378  return -1;
379  // We should be able to read next byte after REX prefix
380  if (lookAtByte(insn, &nnextByte))
381  return -1;
382  unconsumeByte(insn);
383  }
384  }
385 
386  switch (byte) {
387  case 0xf0: /* LOCK */
388  case 0xf2: /* REPNE/REPNZ */
389  case 0xf3: /* REP or REPE/REPZ */
390  setPrefixPresent(insn, byte);
391  break;
392  case 0x2e: /* CS segment override -OR- Branch not taken */
393  case 0x36: /* SS segment override -OR- Branch taken */
394  case 0x3e: /* DS segment override */
395  case 0x26: /* ES segment override */
396  case 0x64: /* FS segment override */
397  case 0x65: /* GS segment override */
398  switch (byte) {
399  case 0x2e:
401  break;
402  case 0x36:
404  break;
405  case 0x3e:
407  break;
408  case 0x26:
410  break;
411  case 0x64:
413  break;
414  case 0x65:
416  break;
417  default:
418  debug("Unhandled override");
419  return -1;
420  }
421  setPrefixPresent(insn, byte);
422  break;
423  case 0x66: /* Operand-size override */
424  insn->hasOpSize = true;
425  setPrefixPresent(insn, byte);
426  break;
427  case 0x67: /* Address-size override */
428  insn->hasAdSize = true;
429  setPrefixPresent(insn, byte);
430  break;
431  default: /* Not a prefix byte */
432  isPrefix = false;
433  break;
434  }
435 
436  if (isPrefix)
437  dbgprintf(insn, "Found prefix 0x%hhx", byte);
438  }
439 
441 
442  if (byte == 0x62) {
443  uint8_t byte1, byte2;
444 
445  if (consumeByte(insn, &byte1)) {
446  dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
447  return -1;
448  }
449 
450  if (lookAtByte(insn, &byte2)) {
451  dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
452  return -1;
453  }
454 
455  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
456  ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
458  } else {
459  unconsumeByte(insn); /* unconsume byte1 */
460  unconsumeByte(insn); /* unconsume byte */
461  }
462 
463  if (insn->vectorExtensionType == TYPE_EVEX) {
464  insn->vectorExtensionPrefix[0] = byte;
465  insn->vectorExtensionPrefix[1] = byte1;
466  if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
467  dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
468  return -1;
469  }
470  if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
471  dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
472  return -1;
473  }
474 
475  /* We simulate the REX prefix for simplicity's sake */
476  if (insn->mode == MODE_64BIT) {
477  insn->rexPrefix = 0x40
478  | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
479  | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
480  | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
481  | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
482  }
483 
484  dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
485  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
486  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
487  }
488  } else if (byte == 0xc4) {
489  uint8_t byte1;
490 
491  if (lookAtByte(insn, &byte1)) {
492  dbgprintf(insn, "Couldn't read second byte of VEX");
493  return -1;
494  }
495 
496  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
498  else
499  unconsumeByte(insn);
500 
501  if (insn->vectorExtensionType == TYPE_VEX_3B) {
502  insn->vectorExtensionPrefix[0] = byte;
503  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
504  consumeByte(insn, &insn->vectorExtensionPrefix[2]);
505 
506  /* We simulate the REX prefix for simplicity's sake */
507 
508  if (insn->mode == MODE_64BIT)
509  insn->rexPrefix = 0x40
510  | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
511  | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
512  | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
513  | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
514 
515  dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
516  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
517  insn->vectorExtensionPrefix[2]);
518  }
519  } else if (byte == 0xc5) {
520  uint8_t byte1;
521 
522  if (lookAtByte(insn, &byte1)) {
523  dbgprintf(insn, "Couldn't read second byte of VEX");
524  return -1;
525  }
526 
527  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
529  else
530  unconsumeByte(insn);
531 
532  if (insn->vectorExtensionType == TYPE_VEX_2B) {
533  insn->vectorExtensionPrefix[0] = byte;
534  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
535 
536  if (insn->mode == MODE_64BIT)
537  insn->rexPrefix = 0x40
538  | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
539 
540  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
541  default:
542  break;
543  case VEX_PREFIX_66:
544  insn->hasOpSize = true;
545  break;
546  }
547 
548  dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
549  insn->vectorExtensionPrefix[0],
550  insn->vectorExtensionPrefix[1]);
551  }
552  } else if (byte == 0x8f) {
553  uint8_t byte1;
554 
555  if (lookAtByte(insn, &byte1)) {
556  dbgprintf(insn, "Couldn't read second byte of XOP");
557  return -1;
558  }
559 
560  if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
562  else
563  unconsumeByte(insn);
564 
565  if (insn->vectorExtensionType == TYPE_XOP) {
566  insn->vectorExtensionPrefix[0] = byte;
567  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
568  consumeByte(insn, &insn->vectorExtensionPrefix[2]);
569 
570  /* We simulate the REX prefix for simplicity's sake */
571 
572  if (insn->mode == MODE_64BIT)
573  insn->rexPrefix = 0x40
574  | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
575  | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
576  | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
577  | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
578 
579  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
580  default:
581  break;
582  case VEX_PREFIX_66:
583  insn->hasOpSize = true;
584  break;
585  }
586 
587  dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
588  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
589  insn->vectorExtensionPrefix[2]);
590  }
591  } else if (isREX(insn, byte)) {
592  if (lookAtByte(insn, &nextByte))
593  return -1;
594  insn->rexPrefix = byte;
595  dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
596  } else
597  unconsumeByte(insn);
598 
599  if (insn->mode == MODE_16BIT) {
600  insn->registerSize = (insn->hasOpSize ? 4 : 2);
601  insn->addressSize = (insn->hasAdSize ? 4 : 2);
602  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
603  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
604  } else if (insn->mode == MODE_32BIT) {
605  insn->registerSize = (insn->hasOpSize ? 2 : 4);
606  insn->addressSize = (insn->hasAdSize ? 2 : 4);
607  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
608  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
609  } else if (insn->mode == MODE_64BIT) {
610  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
611  insn->registerSize = 8;
612  insn->addressSize = (insn->hasAdSize ? 4 : 8);
613  insn->displacementSize = 4;
614  insn->immediateSize = 4;
615  } else {
616  insn->registerSize = (insn->hasOpSize ? 2 : 4);
617  insn->addressSize = (insn->hasAdSize ? 4 : 8);
618  insn->displacementSize = (insn->hasOpSize ? 2 : 4);
619  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
620  }
621  }
622 
623  return 0;
624 }
625 
626 /*
627  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
628  * extended or escape opcodes).
629  *
630  * @param insn - The instruction whose opcode is to be read.
631  * @return - 0 if the opcode could be read successfully; nonzero otherwise.
632  */
633 static int readOpcode(struct InternalInstruction* insn) {
634  /* Determine the length of the primary opcode */
635 
636  uint8_t current;
637 
638  dbgprintf(insn, "readOpcode()");
639 
640  insn->opcodeType = ONEBYTE;
641 
642  if (insn->vectorExtensionType == TYPE_EVEX) {
643  switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
644  default:
645  dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
647  return -1;
648  case VEX_LOB_0F:
649  insn->opcodeType = TWOBYTE;
650  return consumeByte(insn, &insn->opcode);
651  case VEX_LOB_0F38:
652  insn->opcodeType = THREEBYTE_38;
653  return consumeByte(insn, &insn->opcode);
654  case VEX_LOB_0F3A:
655  insn->opcodeType = THREEBYTE_3A;
656  return consumeByte(insn, &insn->opcode);
657  }
658  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
659  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
660  default:
661  dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
663  return -1;
664  case VEX_LOB_0F:
665  insn->opcodeType = TWOBYTE;
666  return consumeByte(insn, &insn->opcode);
667  case VEX_LOB_0F38:
668  insn->opcodeType = THREEBYTE_38;
669  return consumeByte(insn, &insn->opcode);
670  case VEX_LOB_0F3A:
671  insn->opcodeType = THREEBYTE_3A;
672  return consumeByte(insn, &insn->opcode);
673  }
674  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
675  insn->opcodeType = TWOBYTE;
676  return consumeByte(insn, &insn->opcode);
677  } else if (insn->vectorExtensionType == TYPE_XOP) {
678  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
679  default:
680  dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
682  return -1;
683  case XOP_MAP_SELECT_8:
684  insn->opcodeType = XOP8_MAP;
685  return consumeByte(insn, &insn->opcode);
686  case XOP_MAP_SELECT_9:
687  insn->opcodeType = XOP9_MAP;
688  return consumeByte(insn, &insn->opcode);
689  case XOP_MAP_SELECT_A:
690  insn->opcodeType = XOPA_MAP;
691  return consumeByte(insn, &insn->opcode);
692  }
693  }
694 
695  if (consumeByte(insn, &current))
696  return -1;
697 
698  if (current == 0x0f) {
699  dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
700 
701  if (consumeByte(insn, &current))
702  return -1;
703 
704  if (current == 0x38) {
705  dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
706 
707  if (consumeByte(insn, &current))
708  return -1;
709 
710  insn->opcodeType = THREEBYTE_38;
711  } else if (current == 0x3a) {
712  dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
713 
714  if (consumeByte(insn, &current))
715  return -1;
716 
717  insn->opcodeType = THREEBYTE_3A;
718  } else {
719  dbgprintf(insn, "Didn't find a three-byte escape prefix");
720 
721  insn->opcodeType = TWOBYTE;
722  }
723  } else if (insn->mandatoryPrefix)
724  // The opcode with mandatory prefix must start with opcode escape.
725  // If not it's legacy repeat prefix
726  insn->mandatoryPrefix = 0;
727 
728  /*
729  * At this point we have consumed the full opcode.
730  * Anything we consume from here on must be unconsumed.
731  */
732 
733  insn->opcode = current;
734 
735  return 0;
736 }
737 
738 static int readModRM(struct InternalInstruction* insn);
739 
740 /*
741  * getIDWithAttrMask - Determines the ID of an instruction, consuming
742  * the ModR/M byte as appropriate for extended and escape opcodes,
743  * and using a supplied attribute mask.
744  *
745  * @param instructionID - A pointer whose target is filled in with the ID of the
746  * instruction.
747  * @param insn - The instruction whose ID is to be determined.
748  * @param attrMask - The attribute mask to search.
749  * @return - 0 if the ModR/M could be read when needed or was not
750  * needed; nonzero otherwise.
751  */
752 static int getIDWithAttrMask(uint16_t* instructionID,
753  struct InternalInstruction* insn,
754  uint16_t attrMask) {
755  bool hasModRMExtension;
756 
757  InstructionContext instructionClass = contextForAttrs(attrMask);
758 
759  hasModRMExtension = modRMRequired(insn->opcodeType,
760  instructionClass,
761  insn->opcode);
762 
763  if (hasModRMExtension) {
764  if (readModRM(insn))
765  return -1;
766 
767  *instructionID = decode(insn->opcodeType,
768  instructionClass,
769  insn->opcode,
770  insn->modRM);
771  } else {
772  *instructionID = decode(insn->opcodeType,
773  instructionClass,
774  insn->opcode,
775  0);
776  }
777 
778  return 0;
779 }
780 
781 /*
782  * is16BitEquivalent - Determines whether two instruction names refer to
783  * equivalent instructions but one is 16-bit whereas the other is not.
784  *
785  * @param orig - The instruction that is not 16-bit
786  * @param equiv - The instruction that is 16-bit
787  */
788 static bool is16BitEquivalent(const char *orig, const char *equiv) {
789  off_t i;
790 
791  for (i = 0;; i++) {
792  if (orig[i] == '\0' && equiv[i] == '\0')
793  return true;
794  if (orig[i] == '\0' || equiv[i] == '\0')
795  return false;
796  if (orig[i] != equiv[i]) {
797  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
798  continue;
799  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
800  continue;
801  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
802  continue;
803  return false;
804  }
805  }
806 }
807 
808 /*
809  * is64Bit - Determines whether this instruction is a 64-bit instruction.
810  *
811  * @param name - The instruction that is not 16-bit
812  */
813 static bool is64Bit(const char *name) {
814  off_t i;
815 
816  for (i = 0;; ++i) {
817  if (name[i] == '\0')
818  return false;
819  if (name[i] == '6' && name[i+1] == '4')
820  return true;
821  }
822 }
823 
824 /*
825  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
826  * appropriate for extended and escape opcodes. Determines the attributes and
827  * context for the instruction before doing so.
828  *
829  * @param insn - The instruction whose ID is to be determined.
830  * @return - 0 if the ModR/M could be read when needed or was not needed;
831  * nonzero otherwise.
832  */
833 static int getID(struct InternalInstruction* insn, const void *miiArg) {
834  uint16_t attrMask;
835  uint16_t instructionID;
836 
837  dbgprintf(insn, "getID()");
838 
839  attrMask = ATTR_NONE;
840 
841  if (insn->mode == MODE_64BIT)
842  attrMask |= ATTR_64BIT;
843 
844  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
845  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
846 
847  if (insn->vectorExtensionType == TYPE_EVEX) {
848  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
849  case VEX_PREFIX_66:
850  attrMask |= ATTR_OPSIZE;
851  break;
852  case VEX_PREFIX_F3:
853  attrMask |= ATTR_XS;
854  break;
855  case VEX_PREFIX_F2:
856  attrMask |= ATTR_XD;
857  break;
858  }
859 
860  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
861  attrMask |= ATTR_EVEXKZ;
862  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
863  attrMask |= ATTR_EVEXB;
865  attrMask |= ATTR_EVEXK;
866  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
867  attrMask |= ATTR_EVEXL;
869  attrMask |= ATTR_EVEXL2;
870  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
871  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
872  case VEX_PREFIX_66:
873  attrMask |= ATTR_OPSIZE;
874  break;
875  case VEX_PREFIX_F3:
876  attrMask |= ATTR_XS;
877  break;
878  case VEX_PREFIX_F2:
879  attrMask |= ATTR_XD;
880  break;
881  }
882 
883  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
884  attrMask |= ATTR_VEXL;
885  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
886  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
887  case VEX_PREFIX_66:
888  attrMask |= ATTR_OPSIZE;
889  break;
890  case VEX_PREFIX_F3:
891  attrMask |= ATTR_XS;
892  break;
893  case VEX_PREFIX_F2:
894  attrMask |= ATTR_XD;
895  break;
896  }
897 
898  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
899  attrMask |= ATTR_VEXL;
900  } else if (insn->vectorExtensionType == TYPE_XOP) {
901  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
902  case VEX_PREFIX_66:
903  attrMask |= ATTR_OPSIZE;
904  break;
905  case VEX_PREFIX_F3:
906  attrMask |= ATTR_XS;
907  break;
908  case VEX_PREFIX_F2:
909  attrMask |= ATTR_XD;
910  break;
911  }
912 
913  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
914  attrMask |= ATTR_VEXL;
915  } else {
916  return -1;
917  }
918  } else if (!insn->mandatoryPrefix) {
919  // If we don't have mandatory prefix we should use legacy prefixes here
920  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
921  attrMask |= ATTR_OPSIZE;
922  if (insn->hasAdSize)
923  attrMask |= ATTR_ADSIZE;
924  if (insn->opcodeType == ONEBYTE) {
925  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
926  // Special support for PAUSE
927  attrMask |= ATTR_XS;
928  } else {
929  if (insn->repeatPrefix == 0xf2)
930  attrMask |= ATTR_XD;
931  else if (insn->repeatPrefix == 0xf3)
932  attrMask |= ATTR_XS;
933  }
934  } else {
935  switch (insn->mandatoryPrefix) {
936  case 0xf2:
937  attrMask |= ATTR_XD;
938  break;
939  case 0xf3:
940  attrMask |= ATTR_XS;
941  break;
942  case 0x66:
943  if (insn->mode != MODE_16BIT)
944  attrMask |= ATTR_OPSIZE;
945  break;
946  case 0x67:
947  attrMask |= ATTR_ADSIZE;
948  break;
949  }
950  }
951 
952  if (insn->rexPrefix & 0x08) {
953  attrMask |= ATTR_REXW;
954  attrMask &= ~ATTR_ADSIZE;
955  }
956 
957  /*
958  * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
959  * of the AdSize prefix is inverted w.r.t. 32-bit mode.
960  */
961  if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
962  insn->opcode == 0xE3)
963  attrMask ^= ATTR_ADSIZE;
964 
965  /*
966  * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
967  * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
968  */
969 
970  if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
971  switch (insn->opcode) {
972  case 0xE8:
973  case 0xE9:
974  // Take care of psubsb and other mmx instructions.
975  if (insn->opcodeType == ONEBYTE) {
976  attrMask ^= ATTR_OPSIZE;
977  insn->immediateSize = 4;
978  insn->displacementSize = 4;
979  }
980  break;
981  case 0x82:
982  case 0x83:
983  case 0x84:
984  case 0x85:
985  case 0x86:
986  case 0x87:
987  case 0x88:
988  case 0x89:
989  case 0x8A:
990  case 0x8B:
991  case 0x8C:
992  case 0x8D:
993  case 0x8E:
994  case 0x8F:
995  // Take care of lea and three byte ops.
996  if (insn->opcodeType == TWOBYTE) {
997  attrMask ^= ATTR_OPSIZE;
998  insn->immediateSize = 4;
999  insn->displacementSize = 4;
1000  }
1001  break;
1002  }
1003  }
1004 
1005  if (getIDWithAttrMask(&instructionID, insn, attrMask))
1006  return -1;
1007 
1008  /* The following clauses compensate for limitations of the tables. */
1009 
1010  if (insn->mode != MODE_64BIT &&
1012  /*
1013  * The tables can't distinquish between cases where the W-bit is used to
1014  * select register size and cases where its a required part of the opcode.
1015  */
1016  if ((insn->vectorExtensionType == TYPE_EVEX &&
1017  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1018  (insn->vectorExtensionType == TYPE_VEX_3B &&
1019  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1020  (insn->vectorExtensionType == TYPE_XOP &&
1021  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1022 
1023  uint16_t instructionIDWithREXW;
1024  if (getIDWithAttrMask(&instructionIDWithREXW,
1025  insn, attrMask | ATTR_REXW)) {
1026  insn->instructionID = instructionID;
1027  insn->spec = specifierForUID(instructionID);
1028  return 0;
1029  }
1030 
1031  auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
1032  // If not a 64-bit instruction. Switch the opcode.
1033  if (!is64Bit(SpecName.data())) {
1034  insn->instructionID = instructionIDWithREXW;
1035  insn->spec = specifierForUID(instructionIDWithREXW);
1036  return 0;
1037  }
1038  }
1039  }
1040 
1041  /*
1042  * Absolute moves need special handling.
1043  * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1044  * inverted w.r.t.
1045  * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1046  * any position.
1047  */
1048  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
1049  /* Make sure we observed the prefixes in any position. */
1050  if (insn->hasAdSize)
1051  attrMask |= ATTR_ADSIZE;
1052  if (insn->hasOpSize)
1053  attrMask |= ATTR_OPSIZE;
1054 
1055  /* In 16-bit, invert the attributes. */
1056  if (insn->mode == MODE_16BIT)
1057  attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1058 
1059  if (getIDWithAttrMask(&instructionID, insn, attrMask))
1060  return -1;
1061 
1062  insn->instructionID = instructionID;
1063  insn->spec = specifierForUID(instructionID);
1064  return 0;
1065  }
1066 
1067  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1068  !(attrMask & ATTR_OPSIZE)) {
1069  /*
1070  * The instruction tables make no distinction between instructions that
1071  * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1072  * particular spot (i.e., many MMX operations). In general we're
1073  * conservative, but in the specific case where OpSize is present but not
1074  * in the right place we check if there's a 16-bit operation.
1075  */
1076 
1077  const struct InstructionSpecifier *spec;
1078  uint16_t instructionIDWithOpsize;
1079  llvm::StringRef specName, specWithOpSizeName;
1080 
1081  spec = specifierForUID(instructionID);
1082 
1083  if (getIDWithAttrMask(&instructionIDWithOpsize,
1084  insn,
1085  attrMask | ATTR_OPSIZE)) {
1086  /*
1087  * ModRM required with OpSize but not present; give up and return version
1088  * without OpSize set
1089  */
1090 
1091  insn->instructionID = instructionID;
1092  insn->spec = spec;
1093  return 0;
1094  }
1095 
1096  specName = GetInstrName(instructionID, miiArg);
1097  specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
1098 
1099  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1100  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1101  insn->instructionID = instructionIDWithOpsize;
1102  insn->spec = specifierForUID(instructionIDWithOpsize);
1103  } else {
1104  insn->instructionID = instructionID;
1105  insn->spec = spec;
1106  }
1107  return 0;
1108  }
1109 
1110  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1111  insn->rexPrefix & 0x01) {
1112  /*
1113  * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1114  * it should decode as XCHG %r8, %eax.
1115  */
1116 
1117  const struct InstructionSpecifier *spec;
1118  uint16_t instructionIDWithNewOpcode;
1119  const struct InstructionSpecifier *specWithNewOpcode;
1120 
1121  spec = specifierForUID(instructionID);
1122 
1123  /* Borrow opcode from one of the other XCHGar opcodes */
1124  insn->opcode = 0x91;
1125 
1126  if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1127  insn,
1128  attrMask)) {
1129  insn->opcode = 0x90;
1130 
1131  insn->instructionID = instructionID;
1132  insn->spec = spec;
1133  return 0;
1134  }
1135 
1136  specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1137 
1138  /* Change back */
1139  insn->opcode = 0x90;
1140 
1141  insn->instructionID = instructionIDWithNewOpcode;
1142  insn->spec = specWithNewOpcode;
1143 
1144  return 0;
1145  }
1146 
1147  insn->instructionID = instructionID;
1148  insn->spec = specifierForUID(insn->instructionID);
1149 
1150  return 0;
1151 }
1152 
1153 /*
1154  * readSIB - Consumes the SIB byte to determine addressing information for an
1155  * instruction.
1156  *
1157  * @param insn - The instruction whose SIB byte is to be read.
1158  * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1159  */
1160 static int readSIB(struct InternalInstruction* insn) {
1161  SIBBase sibBaseBase = SIB_BASE_NONE;
1162  uint8_t index, base;
1163 
1164  dbgprintf(insn, "readSIB()");
1165 
1166  if (insn->consumedSIB)
1167  return 0;
1168 
1169  insn->consumedSIB = true;
1170 
1171  switch (insn->addressSize) {
1172  case 2:
1173  dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1174  return -1;
1175  case 4:
1176  insn->sibIndexBase = SIB_INDEX_EAX;
1177  sibBaseBase = SIB_BASE_EAX;
1178  break;
1179  case 8:
1180  insn->sibIndexBase = SIB_INDEX_RAX;
1181  sibBaseBase = SIB_BASE_RAX;
1182  break;
1183  }
1184 
1185  if (consumeByte(insn, &insn->sib))
1186  return -1;
1187 
1188  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1189 
1190  if (index == 0x4) {
1191  insn->sibIndex = SIB_INDEX_NONE;
1192  } else {
1193  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
1194  }
1195 
1196  insn->sibScale = 1 << scaleFromSIB(insn->sib);
1197 
1198  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1199 
1200  switch (base) {
1201  case 0x5:
1202  case 0xd:
1203  switch (modFromModRM(insn->modRM)) {
1204  case 0x0:
1205  insn->eaDisplacement = EA_DISP_32;
1206  insn->sibBase = SIB_BASE_NONE;
1207  break;
1208  case 0x1:
1209  insn->eaDisplacement = EA_DISP_8;
1210  insn->sibBase = (SIBBase)(sibBaseBase + base);
1211  break;
1212  case 0x2:
1213  insn->eaDisplacement = EA_DISP_32;
1214  insn->sibBase = (SIBBase)(sibBaseBase + base);
1215  break;
1216  case 0x3:
1217  debug("Cannot have Mod = 0b11 and a SIB byte");
1218  return -1;
1219  }
1220  break;
1221  default:
1222  insn->sibBase = (SIBBase)(sibBaseBase + base);
1223  break;
1224  }
1225 
1226  return 0;
1227 }
1228 
1229 /*
1230  * readDisplacement - Consumes the displacement of an instruction.
1231  *
1232  * @param insn - The instruction whose displacement is to be read.
1233  * @return - 0 if the displacement byte was successfully read; nonzero
1234  * otherwise.
1235  */
1236 static int readDisplacement(struct InternalInstruction* insn) {
1237  int8_t d8;
1238  int16_t d16;
1239  int32_t d32;
1240 
1241  dbgprintf(insn, "readDisplacement()");
1242 
1243  if (insn->consumedDisplacement)
1244  return 0;
1245 
1246  insn->consumedDisplacement = true;
1247  insn->displacementOffset = insn->readerCursor - insn->startLocation;
1248 
1249  switch (insn->eaDisplacement) {
1250  case EA_DISP_NONE:
1251  insn->consumedDisplacement = false;
1252  break;
1253  case EA_DISP_8:
1254  if (consumeInt8(insn, &d8))
1255  return -1;
1256  insn->displacement = d8;
1257  break;
1258  case EA_DISP_16:
1259  if (consumeInt16(insn, &d16))
1260  return -1;
1261  insn->displacement = d16;
1262  break;
1263  case EA_DISP_32:
1264  if (consumeInt32(insn, &d32))
1265  return -1;
1266  insn->displacement = d32;
1267  break;
1268  }
1269 
1270  insn->consumedDisplacement = true;
1271  return 0;
1272 }
1273 
1274 /*
1275  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1276  * displacement) for an instruction and interprets it.
1277  *
1278  * @param insn - The instruction whose addressing information is to be read.
1279  * @return - 0 if the information was successfully read; nonzero otherwise.
1280  */
1281 static int readModRM(struct InternalInstruction* insn) {
1282  uint8_t mod, rm, reg;
1283 
1284  dbgprintf(insn, "readModRM()");
1285 
1286  if (insn->consumedModRM)
1287  return 0;
1288 
1289  if (consumeByte(insn, &insn->modRM))
1290  return -1;
1291  insn->consumedModRM = true;
1292 
1293  mod = modFromModRM(insn->modRM);
1294  rm = rmFromModRM(insn->modRM);
1295  reg = regFromModRM(insn->modRM);
1296 
1297  /*
1298  * This goes by insn->registerSize to pick the correct register, which messes
1299  * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1300  * fixupReg().
1301  */
1302  switch (insn->registerSize) {
1303  case 2:
1304  insn->regBase = MODRM_REG_AX;
1305  insn->eaRegBase = EA_REG_AX;
1306  break;
1307  case 4:
1308  insn->regBase = MODRM_REG_EAX;
1309  insn->eaRegBase = EA_REG_EAX;
1310  break;
1311  case 8:
1312  insn->regBase = MODRM_REG_RAX;
1313  insn->eaRegBase = EA_REG_RAX;
1314  break;
1315  }
1316 
1317  reg |= rFromREX(insn->rexPrefix) << 3;
1318  rm |= bFromREX(insn->rexPrefix) << 3;
1319  if (insn->vectorExtensionType == TYPE_EVEX) {
1320  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1321  rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1322  }
1323 
1324  insn->reg = (Reg)(insn->regBase + reg);
1325 
1326  switch (insn->addressSize) {
1327  case 2:
1328  insn->eaBaseBase = EA_BASE_BX_SI;
1329 
1330  switch (mod) {
1331  case 0x0:
1332  if (rm == 0x6) {
1333  insn->eaBase = EA_BASE_NONE;
1334  insn->eaDisplacement = EA_DISP_16;
1335  if (readDisplacement(insn))
1336  return -1;
1337  } else {
1338  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1339  insn->eaDisplacement = EA_DISP_NONE;
1340  }
1341  break;
1342  case 0x1:
1343  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1344  insn->eaDisplacement = EA_DISP_8;
1345  insn->displacementSize = 1;
1346  if (readDisplacement(insn))
1347  return -1;
1348  break;
1349  case 0x2:
1350  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1351  insn->eaDisplacement = EA_DISP_16;
1352  if (readDisplacement(insn))
1353  return -1;
1354  break;
1355  case 0x3:
1356  insn->eaBase = (EABase)(insn->eaRegBase + rm);
1357  if (readDisplacement(insn))
1358  return -1;
1359  break;
1360  }
1361  break;
1362  case 4:
1363  case 8:
1364  insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1365 
1366  switch (mod) {
1367  case 0x0:
1368  insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1369  // In determining whether RIP-relative mode is used (rm=5),
1370  // or whether a SIB byte is present (rm=4),
1371  // the extension bits (REX.b and EVEX.x) are ignored.
1372  switch (rm & 7) {
1373  case 0x4: // SIB byte is present
1374  insn->eaBase = (insn->addressSize == 4 ?
1375  EA_BASE_sib : EA_BASE_sib64);
1376  if (readSIB(insn) || readDisplacement(insn))
1377  return -1;
1378  break;
1379  case 0x5: // RIP-relative
1380  insn->eaBase = EA_BASE_NONE;
1381  insn->eaDisplacement = EA_DISP_32;
1382  if (readDisplacement(insn))
1383  return -1;
1384  break;
1385  default:
1386  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1387  break;
1388  }
1389  break;
1390  case 0x1:
1391  insn->displacementSize = 1;
1392  /* FALLTHROUGH */
1393  case 0x2:
1394  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1395  switch (rm & 7) {
1396  case 0x4: // SIB byte is present
1397  insn->eaBase = EA_BASE_sib;
1398  if (readSIB(insn) || readDisplacement(insn))
1399  return -1;
1400  break;
1401  default:
1402  insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1403  if (readDisplacement(insn))
1404  return -1;
1405  break;
1406  }
1407  break;
1408  case 0x3:
1409  insn->eaDisplacement = EA_DISP_NONE;
1410  insn->eaBase = (EABase)(insn->eaRegBase + rm);
1411  break;
1412  }
1413  break;
1414  } /* switch (insn->addressSize) */
1415 
1416  return 0;
1417 }
1418 
1419 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1420  static uint16_t name(struct InternalInstruction *insn, \
1421  OperandType type, \
1422  uint8_t index, \
1423  uint8_t *valid) { \
1424  *valid = 1; \
1425  switch (type) { \
1426  default: \
1427  debug("Unhandled register type"); \
1428  *valid = 0; \
1429  return 0; \
1430  case TYPE_Rv: \
1431  return base + index; \
1432  case TYPE_R8: \
1433  if (insn->rexPrefix && \
1434  index >= 4 && index <= 7) { \
1435  return prefix##_SPL + (index - 4); \
1436  } else { \
1437  return prefix##_AL + index; \
1438  } \
1439  case TYPE_R16: \
1440  return prefix##_AX + index; \
1441  case TYPE_R32: \
1442  return prefix##_EAX + index; \
1443  case TYPE_R64: \
1444  return prefix##_RAX + index; \
1445  case TYPE_ZMM: \
1446  return prefix##_ZMM0 + index; \
1447  case TYPE_YMM: \
1448  return prefix##_YMM0 + index; \
1449  case TYPE_XMM: \
1450  return prefix##_XMM0 + index; \
1451  case TYPE_VK: \
1452  if (index > 7) \
1453  *valid = 0; \
1454  return prefix##_K0 + index; \
1455  case TYPE_MM64: \
1456  return prefix##_MM0 + (index & 0x7); \
1457  case TYPE_SEGMENTREG: \
1458  if ((index & 7) > 5) \
1459  *valid = 0; \
1460  return prefix##_ES + (index & 7); \
1461  case TYPE_DEBUGREG: \
1462  return prefix##_DR0 + index; \
1463  case TYPE_CONTROLREG: \
1464  return prefix##_CR0 + index; \
1465  case TYPE_BNDR: \
1466  if (index > 3) \
1467  *valid = 0; \
1468  return prefix##_BND0 + index; \
1469  case TYPE_MVSIBX: \
1470  return prefix##_XMM0 + index; \
1471  case TYPE_MVSIBY: \
1472  return prefix##_YMM0 + index; \
1473  case TYPE_MVSIBZ: \
1474  return prefix##_ZMM0 + index; \
1475  } \
1476  }
1477 
1478 /*
1479  * fixup*Value - Consults an operand type to determine the meaning of the
1480  * reg or R/M field. If the operand is an XMM operand, for example, an
1481  * operand would be XMM0 instead of AX, which readModRM() would otherwise
1482  * misinterpret it as.
1483  *
1484  * @param insn - The instruction containing the operand.
1485  * @param type - The operand type.
1486  * @param index - The existing value of the field as reported by readModRM().
1487  * @param valid - The address of a uint8_t. The target is set to 1 if the
1488  * field is valid for the register class; 0 if not.
1489  * @return - The proper value.
1490  */
1491 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1492 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1493 
1494 /*
1495  * fixupReg - Consults an operand specifier to determine which of the
1496  * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1497  *
1498  * @param insn - See fixup*Value().
1499  * @param op - The operand specifier.
1500  * @return - 0 if fixup was successful; -1 if the register returned was
1501  * invalid for its class.
1502  */
1503 static int fixupReg(struct InternalInstruction *insn,
1504  const struct OperandSpecifier *op) {
1505  uint8_t valid;
1506 
1507  dbgprintf(insn, "fixupReg()");
1508 
1509  switch ((OperandEncoding)op->encoding) {
1510  default:
1511  debug("Expected a REG or R/M encoding in fixupReg");
1512  return -1;
1513  case ENCODING_VVVV:
1514  insn->vvvv = (Reg)fixupRegValue(insn,
1515  (OperandType)op->type,
1516  insn->vvvv,
1517  &valid);
1518  if (!valid)
1519  return -1;
1520  break;
1521  case ENCODING_REG:
1522  insn->reg = (Reg)fixupRegValue(insn,
1523  (OperandType)op->type,
1524  insn->reg - insn->regBase,
1525  &valid);
1526  if (!valid)
1527  return -1;
1528  break;
1530  if (insn->eaBase >= insn->eaRegBase) {
1531  insn->eaBase = (EABase)fixupRMValue(insn,
1532  (OperandType)op->type,
1533  insn->eaBase - insn->eaRegBase,
1534  &valid);
1535  if (!valid)
1536  return -1;
1537  }
1538  break;
1539  }
1540 
1541  return 0;
1542 }
1543 
1544 /*
1545  * readOpcodeRegister - Reads an operand from the opcode field of an
1546  * instruction and interprets it appropriately given the operand width.
1547  * Handles AddRegFrm instructions.
1548  *
1549  * @param insn - the instruction whose opcode field is to be read.
1550  * @param size - The width (in bytes) of the register being specified.
1551  * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1552  * RAX.
1553  * @return - 0 on success; nonzero otherwise.
1554  */
1555 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1556  dbgprintf(insn, "readOpcodeRegister()");
1557 
1558  if (size == 0)
1559  size = insn->registerSize;
1560 
1561  switch (size) {
1562  case 1:
1563  insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1564  | (insn->opcode & 7)));
1565  if (insn->rexPrefix &&
1566  insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1567  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1568  insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1569  + (insn->opcodeRegister - MODRM_REG_AL - 4));
1570  }
1571 
1572  break;
1573  case 2:
1574  insn->opcodeRegister = (Reg)(MODRM_REG_AX
1575  + ((bFromREX(insn->rexPrefix) << 3)
1576  | (insn->opcode & 7)));
1577  break;
1578  case 4:
1579  insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1580  + ((bFromREX(insn->rexPrefix) << 3)
1581  | (insn->opcode & 7)));
1582  break;
1583  case 8:
1584  insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1585  + ((bFromREX(insn->rexPrefix) << 3)
1586  | (insn->opcode & 7)));
1587  break;
1588  }
1589 
1590  return 0;
1591 }
1592 
1593 /*
1594  * readImmediate - Consumes an immediate operand from an instruction, given the
1595  * desired operand size.
1596  *
1597  * @param insn - The instruction whose operand is to be read.
1598  * @param size - The width (in bytes) of the operand.
1599  * @return - 0 if the immediate was successfully consumed; nonzero
1600  * otherwise.
1601  */
1602 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1603  uint8_t imm8;
1604  uint16_t imm16;
1605  uint32_t imm32;
1606  uint64_t imm64;
1607 
1608  dbgprintf(insn, "readImmediate()");
1609 
1610  if (insn->numImmediatesConsumed == 2) {
1611  debug("Already consumed two immediates");
1612  return -1;
1613  }
1614 
1615  if (size == 0)
1616  size = insn->immediateSize;
1617  else
1618  insn->immediateSize = size;
1619  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1620 
1621  switch (size) {
1622  case 1:
1623  if (consumeByte(insn, &imm8))
1624  return -1;
1625  insn->immediates[insn->numImmediatesConsumed] = imm8;
1626  break;
1627  case 2:
1628  if (consumeUInt16(insn, &imm16))
1629  return -1;
1630  insn->immediates[insn->numImmediatesConsumed] = imm16;
1631  break;
1632  case 4:
1633  if (consumeUInt32(insn, &imm32))
1634  return -1;
1635  insn->immediates[insn->numImmediatesConsumed] = imm32;
1636  break;
1637  case 8:
1638  if (consumeUInt64(insn, &imm64))
1639  return -1;
1640  insn->immediates[insn->numImmediatesConsumed] = imm64;
1641  break;
1642  }
1643 
1644  insn->numImmediatesConsumed++;
1645 
1646  return 0;
1647 }
1648 
1649 /*
1650  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1651  *
1652  * @param insn - The instruction whose operand is to be read.
1653  * @return - 0 if the vvvv was successfully consumed; nonzero
1654  * otherwise.
1655  */
1656 static int readVVVV(struct InternalInstruction* insn) {
1657  dbgprintf(insn, "readVVVV()");
1658 
1659  int vvvv;
1660  if (insn->vectorExtensionType == TYPE_EVEX)
1661  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1663  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1664  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1665  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1666  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1667  else if (insn->vectorExtensionType == TYPE_XOP)
1668  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1669  else
1670  return -1;
1671 
1672  if (insn->mode != MODE_64BIT)
1673  vvvv &= 0x7;
1674 
1675  insn->vvvv = static_cast<Reg>(vvvv);
1676  return 0;
1677 }
1678 
1679 /*
1680  * readMaskRegister - Reads an mask register from the opcode field of an
1681  * instruction.
1682  *
1683  * @param insn - The instruction whose opcode field is to be read.
1684  * @return - 0 on success; nonzero otherwise.
1685  */
1686 static int readMaskRegister(struct InternalInstruction* insn) {
1687  dbgprintf(insn, "readMaskRegister()");
1688 
1689  if (insn->vectorExtensionType != TYPE_EVEX)
1690  return -1;
1691 
1692  insn->writemask =
1693  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1694  return 0;
1695 }
1696 
1697 /*
1698  * readOperands - Consults the specifier for an instruction and consumes all
1699  * operands for that instruction, interpreting them as it goes.
1700  *
1701  * @param insn - The instruction whose operands are to be read and interpreted.
1702  * @return - 0 if all operands could be read; nonzero otherwise.
1703  */
1704 static int readOperands(struct InternalInstruction* insn) {
1705  int hasVVVV, needVVVV;
1706  int sawRegImm = 0;
1707 
1708  dbgprintf(insn, "readOperands()");
1709 
1710  /* If non-zero vvvv specified, need to make sure one of the operands
1711  uses it. */
1712  hasVVVV = !readVVVV(insn);
1713  needVVVV = hasVVVV && (insn->vvvv != 0);
1714 
1715  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1716  switch (Op.encoding) {
1717  case ENCODING_NONE:
1718  case ENCODING_SI:
1719  case ENCODING_DI:
1720  break;
1722  // VSIB can use the V2 bit so check only the other bits.
1723  if (needVVVV)
1724  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1725  if (readModRM(insn))
1726  return -1;
1727 
1728  // Reject if SIB wasn't used.
1729  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1730  return -1;
1731 
1732  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1733  if (insn->sibIndex == SIB_INDEX_NONE)
1734  insn->sibIndex = (SIBIndex)4;
1735 
1736  // If EVEX.v2 is set this is one of the 16-31 registers.
1737  if (insn->vectorExtensionType == TYPE_EVEX &&
1739  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1740 
1741  // Adjust the index register to the correct size.
1742  switch ((OperandType)Op.type) {
1743  default:
1744  debug("Unhandled VSIB index type");
1745  return -1;
1746  case TYPE_MVSIBX:
1747  insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
1748  (insn->sibIndex - insn->sibIndexBase));
1749  break;
1750  case TYPE_MVSIBY:
1751  insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
1752  (insn->sibIndex - insn->sibIndexBase));
1753  break;
1754  case TYPE_MVSIBZ:
1755  insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
1756  (insn->sibIndex - insn->sibIndexBase));
1757  break;
1758  }
1759 
1760  // Apply the AVX512 compressed displacement scaling factor.
1761  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1762  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1763  break;
1764  case ENCODING_REG:
1766  if (readModRM(insn))
1767  return -1;
1768  if (fixupReg(insn, &Op))
1769  return -1;
1770  // Apply the AVX512 compressed displacement scaling factor.
1771  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1772  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1773  break;
1774  case ENCODING_IB:
1775  if (sawRegImm) {
1776  /* Saw a register immediate so don't read again and instead split the
1777  previous immediate. FIXME: This is a hack. */
1778  insn->immediates[insn->numImmediatesConsumed] =
1779  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1780  ++insn->numImmediatesConsumed;
1781  break;
1782  }
1783  if (readImmediate(insn, 1))
1784  return -1;
1785  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1786  sawRegImm = 1;
1787  break;
1788  case ENCODING_IW:
1789  if (readImmediate(insn, 2))
1790  return -1;
1791  break;
1792  case ENCODING_ID:
1793  if (readImmediate(insn, 4))
1794  return -1;
1795  break;
1796  case ENCODING_IO:
1797  if (readImmediate(insn, 8))
1798  return -1;
1799  break;
1800  case ENCODING_Iv:
1801  if (readImmediate(insn, insn->immediateSize))
1802  return -1;
1803  break;
1804  case ENCODING_Ia:
1805  if (readImmediate(insn, insn->addressSize))
1806  return -1;
1807  break;
1808  case ENCODING_IRC:
1809  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1811  break;
1812  case ENCODING_RB:
1813  if (readOpcodeRegister(insn, 1))
1814  return -1;
1815  break;
1816  case ENCODING_RW:
1817  if (readOpcodeRegister(insn, 2))
1818  return -1;
1819  break;
1820  case ENCODING_RD:
1821  if (readOpcodeRegister(insn, 4))
1822  return -1;
1823  break;
1824  case ENCODING_RO:
1825  if (readOpcodeRegister(insn, 8))
1826  return -1;
1827  break;
1828  case ENCODING_Rv:
1829  if (readOpcodeRegister(insn, 0))
1830  return -1;
1831  break;
1832  case ENCODING_FP:
1833  break;
1834  case ENCODING_VVVV:
1835  needVVVV = 0; /* Mark that we have found a VVVV operand. */
1836  if (!hasVVVV)
1837  return -1;
1838  if (fixupReg(insn, &Op))
1839  return -1;
1840  break;
1841  case ENCODING_WRITEMASK:
1842  if (readMaskRegister(insn))
1843  return -1;
1844  break;
1845  case ENCODING_DUP:
1846  break;
1847  default:
1848  dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1849  return -1;
1850  }
1851  }
1852 
1853  /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1854  if (needVVVV) return -1;
1855 
1856  return 0;
1857 }
1858 
1859 /*
1860  * decodeInstruction - Reads and interprets a full instruction provided by the
1861  * user.
1862  *
1863  * @param insn - A pointer to the instruction to be populated. Must be
1864  * pre-allocated.
1865  * @param reader - The function to be used to read the instruction's bytes.
1866  * @param readerArg - A generic argument to be passed to the reader to store
1867  * any internal state.
1868  * @param logger - If non-NULL, the function to be used to write log messages
1869  * and warnings.
1870  * @param loggerArg - A generic argument to be passed to the logger to store
1871  * any internal state.
1872  * @param startLoc - The address (in the reader's address space) of the first
1873  * byte in the instruction.
1874  * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1875  * decode the instruction in.
1876  * @return - 0 if the instruction's memory could be read; nonzero if
1877  * not.
1878  */
1880  struct InternalInstruction *insn, byteReader_t reader,
1881  const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
1882  uint64_t startLoc, DisassemblerMode mode) {
1883  memset(insn, 0, sizeof(struct InternalInstruction));
1884 
1885  insn->reader = reader;
1886  insn->readerArg = readerArg;
1887  insn->dlog = logger;
1888  insn->dlogArg = loggerArg;
1889  insn->startLocation = startLoc;
1890  insn->readerCursor = startLoc;
1891  insn->mode = mode;
1892  insn->numImmediatesConsumed = 0;
1893 
1894  if (readPrefixes(insn) ||
1895  readOpcode(insn) ||
1896  getID(insn, miiArg) ||
1897  insn->instructionID == 0 ||
1898  readOperands(insn))
1899  return -1;
1900 
1901  insn->operands = x86OperandSets[insn->spec->operands];
1902 
1903  insn->length = insn->readerCursor - insn->startLocation;
1904 
1905  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1906  startLoc, insn->readerCursor, insn->length);
1907 
1908  if (insn->length > 15)
1909  dbgprintf(insn, "Instruction exceeds 15-byte limit");
1910 
1911  return 0;
1912 }
#define bFromEVEX4of4(evex)
static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix)
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
#define rFromREX(rex)
#define XOP9_MAP_SYM
#define wFromEVEX3of4(evex)
The specification for how to extract and interpret a full instruction and its operands.
#define bFromVEX2of3(vex)
static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
#define rmFromModRM(modRM)
static int readSIB(struct InternalInstruction *insn)
#define zFromEVEX4of4(evex)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
#define vvvvFromVEX2of2(vex)
#define vvvvFromEVEX3of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
#define r2FromEVEX2of4(evex)
#define op(i)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
#define aaaFromEVEX4of4(evex)
amode Optimize addressing mode
#define bFromEVEX2of4(evex)
SIBIndex
All possible values of the SIB index field.
static int readOpcode(struct InternalInstruction *insn)
#define rFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define rFromVEX2of2(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define lFromVEX2of2(vex)
static int readVVVV(struct InternalInstruction *insn)
static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
#define xFromXOP2of3(xop)
#define v2FromEVEX4of4(evex)
static int getID(struct InternalInstruction *insn, const void *miiArg)
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
ModRMDecision modRMDecisions[256]
static void unconsumeByte(struct InternalInstruction *insn)
#define ppFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define vvvvFromVEX3of3(vex)
The specification for how to extract and interpret one operand.
static bool is64Bit(const char *name)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
#define CASE_ENCODING_RM
#define bFromXOP2of3(xop)
#define baseFromSIB(sib)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
#define wFromREX(rex)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
#define lFromVEX3of3(vex)
#define ppFromVEX2of2(vex)
static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode)
The x86 internal instruction, which is produced by the decoder.
static int readPrefixes(struct InternalInstruction *insn)
#define THREEBYTE3A_SYM
static void dbgprintf(struct InternalInstruction *insn, const char *format,...)
StringRef GetInstrName(unsigned Opcode, const void *mii)
#define ONEBYTE_SYM
#define lFromXOP3of3(xop)
#define bFromREX(rex)
#define mmmmmFromVEX2of3(vex)
#define ppFromEVEX3of4(evex)
#define scaleFromSIB(sib)
static int readOperands(struct InternalInstruction *insn)
#define xFromVEX2of3(vex)
static bool is16BitEquivalent(const char *orig, const char *equiv)
#define rFromXOP2of3(xop)
#define CONSUME_FUNC(name, type)
#define INSTRUCTIONS_SYM
#define THREEBYTE38_SYM
#define TWOBYTE_SYM
#define CASE_ENCODING_VSIB
static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer)
Returns true if Prefix is a prefix of longer.
#define lFromEVEX4of4(evex)
static InstructionContext contextForAttrs(uint16_t attrMask)
#define l2FromEVEX4of4(evex)
#define wFromVEX3of3(vex)
#define wFromXOP3of3(xop)
#define debug(s)
#define xFromREX(rex)
#define indexFromSIB(sib)
SIBBase
All possible values of the SIB base field.
OperandType
Types of operands to CF instructions.
static int readModRM(struct InternalInstruction *insn)
#define mmFromEVEX2of4(evex)
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define XOP8_MAP_SYM
#define rFromVEX2of3(vex)
Specifies which opcode->instruction tables to look at given a particular context (set of attributes)...
#define regFromModRM(modRM)
static const struct InstructionSpecifier * specifierForUID(InstrUID uid)
#define modFromModRM(modRM)
OpcodeDecision opcodeDecisions[IC_max]
static int readMaskRegister(struct InternalInstruction *insn)
aarch64 promote const
static const char * name
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define XOPA_MAP_SYM
#define CONTEXTS_SYM
#define vvvvFromXOP3of3(vex)
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int getIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
#define ppFromXOP3of3(xop)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Specifies which set of ModR/M->instruction tables to look at given a particular opcode.
DisassemblerMode
Decoding mode for the Intel disassembler.
Specifies whether a ModR/M byte is needed and (if so) which instruction each possible value of the Mo...