LLVM  9.0.0svn
X86DisassemblerDecoder.cpp
Go to the documentation of this file.
1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains the implementation of the instruction decoder.
11 // Documentation for the disassembler can be found in X86Disassembler.h.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include <cstdarg> /* for va_*() */
16 #include <cstdio> /* for vsnprintf() */
17 #include <cstdlib> /* for exit() */
18 #include <cstring> /* for memset() */
19 
20 #include "X86DisassemblerDecoder.h"
21 
22 using namespace llvm::X86Disassembler;
23 
24 /// Specifies whether a ModR/M byte is needed and (if so) which
25 /// instruction each possible value of the ModR/M byte corresponds to. Once
26 /// this information is known, we have narrowed down to a single instruction.
27 struct ModRMDecision {
28  uint8_t modrm_type;
29  uint16_t instructionIDs;
30 };
31 
32 /// Specifies which set of ModR/M->instruction tables to look at
33 /// given a particular opcode.
35  ModRMDecision modRMDecisions[256];
36 };
37 
38 /// Specifies which opcode->instruction tables to look at given
39 /// a particular context (set of attributes). Since there are many possible
40 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
41 /// applies given a specific set of attributes. Hence there are only IC_max
42 /// entries in this table, rather than 2^(ATTR_max).
44  OpcodeDecision opcodeDecisions[IC_max];
45 };
46 
47 #include "X86GenDisassemblerTables.inc"
48 
49 #ifndef NDEBUG
50 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
51 #else
52 #define debug(s) do { } while (0)
53 #endif
54 
55 /*
56  * contextForAttrs - Client for the instruction context table. Takes a set of
57  * attributes and returns the appropriate decode context.
58  *
59  * @param attrMask - Attributes, from the enumeration attributeBits.
60  * @return - The InstructionContext to use when looking up an
61  * an instruction with these attributes.
62  */
63 static InstructionContext contextForAttrs(uint16_t attrMask) {
64  return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
65 }
66 
67 /*
68  * modRMRequired - Reads the appropriate instruction table to determine whether
69  * the ModR/M byte is required to decode a particular instruction.
70  *
71  * @param type - The opcode type (i.e., how many bytes it has).
72  * @param insnContext - The context for the instruction, as returned by
73  * contextForAttrs.
74  * @param opcode - The last byte of the instruction's opcode, not counting
75  * ModR/M extensions and escapes.
76  * @return - true if the ModR/M byte is required, false otherwise.
77  */
79  InstructionContext insnContext,
80  uint16_t opcode) {
81  const struct ContextDecision* decision = nullptr;
82 
83  switch (type) {
84  case ONEBYTE:
85  decision = &ONEBYTE_SYM;
86  break;
87  case TWOBYTE:
88  decision = &TWOBYTE_SYM;
89  break;
90  case THREEBYTE_38:
91  decision = &THREEBYTE38_SYM;
92  break;
93  case THREEBYTE_3A:
94  decision = &THREEBYTE3A_SYM;
95  break;
96  case XOP8_MAP:
97  decision = &XOP8_MAP_SYM;
98  break;
99  case XOP9_MAP:
100  decision = &XOP9_MAP_SYM;
101  break;
102  case XOPA_MAP:
103  decision = &XOPA_MAP_SYM;
104  break;
105  case THREEDNOW_MAP:
106  decision = &THREEDNOW_MAP_SYM;
107  break;
108  }
109 
110  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
111  modrm_type != MODRM_ONEENTRY;
112 }
113 
114 /*
115  * decode - Reads the appropriate instruction table to obtain the unique ID of
116  * an instruction.
117  *
118  * @param type - See modRMRequired().
119  * @param insnContext - See modRMRequired().
120  * @param opcode - See modRMRequired().
121  * @param modRM - The ModR/M byte if required, or any value if not.
122  * @return - The UID of the instruction, or 0 on failure.
123  */
125  InstructionContext insnContext,
126  uint8_t opcode,
127  uint8_t modRM) {
128  const struct ModRMDecision* dec = nullptr;
129 
130  switch (type) {
131  case ONEBYTE:
132  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133  break;
134  case TWOBYTE:
135  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136  break;
137  case THREEBYTE_38:
138  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139  break;
140  case THREEBYTE_3A:
141  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142  break;
143  case XOP8_MAP:
144  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145  break;
146  case XOP9_MAP:
147  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148  break;
149  case XOPA_MAP:
150  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
151  break;
152  case THREEDNOW_MAP:
153  dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
154  break;
155  }
156 
157  switch (dec->modrm_type) {
158  default:
159  debug("Corrupt table! Unknown modrm_type");
160  return 0;
161  case MODRM_ONEENTRY:
162  return modRMTable[dec->instructionIDs];
163  case MODRM_SPLITRM:
164  if (modFromModRM(modRM) == 0x3)
165  return modRMTable[dec->instructionIDs+1];
166  return modRMTable[dec->instructionIDs];
167  case MODRM_SPLITREG:
168  if (modFromModRM(modRM) == 0x3)
169  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
170  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
171  case MODRM_SPLITMISC:
172  if (modFromModRM(modRM) == 0x3)
173  return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
174  return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
175  case MODRM_FULL:
176  return modRMTable[dec->instructionIDs+modRM];
177  }
178 }
179 
180 /*
181  * specifierForUID - Given a UID, returns the name and operand specification for
182  * that instruction.
183  *
184  * @param uid - The unique ID for the instruction. This should be returned by
185  * decode(); specifierForUID will not check bounds.
186  * @return - A pointer to the specification for that instruction.
187  */
188 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
189  return &INSTRUCTIONS_SYM[uid];
190 }
191 
192 /*
193  * consumeByte - Uses the reader function provided by the user to consume one
194  * byte from the instruction's memory and advance the cursor.
195  *
196  * @param insn - The instruction with the reader function to use. The cursor
197  * for this instruction is advanced.
198  * @param byte - A pointer to a pre-allocated memory buffer to be populated
199  * with the data read.
200  * @return - 0 if the read was successful; nonzero otherwise.
201  */
202 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
203  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
204 
205  if (!ret)
206  ++(insn->readerCursor);
207 
208  return ret;
209 }
210 
211 /*
212  * lookAtByte - Like consumeByte, but does not advance the cursor.
213  *
214  * @param insn - See consumeByte().
215  * @param byte - See consumeByte().
216  * @return - See consumeByte().
217  */
218 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
219  return insn->reader(insn->readerArg, byte, insn->readerCursor);
220 }
221 
222 static void unconsumeByte(struct InternalInstruction* insn) {
223  insn->readerCursor--;
224 }
225 
226 #define CONSUME_FUNC(name, type) \
227  static int name(struct InternalInstruction* insn, type* ptr) { \
228  type combined = 0; \
229  unsigned offset; \
230  for (offset = 0; offset < sizeof(type); ++offset) { \
231  uint8_t byte; \
232  int ret = insn->reader(insn->readerArg, \
233  &byte, \
234  insn->readerCursor + offset); \
235  if (ret) \
236  return ret; \
237  combined = combined | ((uint64_t)byte << (offset * 8)); \
238  } \
239  *ptr = combined; \
240  insn->readerCursor += sizeof(type); \
241  return 0; \
242  }
243 
244 /*
245  * consume* - Use the reader function provided by the user to consume data
246  * values of various sizes from the instruction's memory and advance the
247  * cursor appropriately. These readers perform endian conversion.
248  *
249  * @param insn - See consumeByte().
250  * @param ptr - A pointer to a pre-allocated memory of appropriate size to
251  * be populated with the data read.
252  * @return - See consumeByte().
253  */
254 CONSUME_FUNC(consumeInt8, int8_t)
255 CONSUME_FUNC(consumeInt16, int16_t)
256 CONSUME_FUNC(consumeInt32, int32_t)
257 CONSUME_FUNC(consumeUInt16, uint16_t)
258 CONSUME_FUNC(consumeUInt32, uint32_t)
259 CONSUME_FUNC(consumeUInt64, uint64_t)
260 
261 /*
262  * dbgprintf - Uses the logging function provided by the user to log a single
263  * message, typically without a carriage-return.
264  *
265  * @param insn - The instruction containing the logging function.
266  * @param format - See printf().
267  * @param ... - See printf().
268  */
269 static void dbgprintf(struct InternalInstruction* insn,
270  const char* format,
271  ...) {
272  char buffer[256];
273  va_list ap;
274 
275  if (!insn->dlog)
276  return;
277 
278  va_start(ap, format);
279  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
280  va_end(ap);
281 
282  insn->dlog(insn->dlogArg, buffer);
283 }
284 
285 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
286  if (insn->mode == MODE_64BIT)
287  return prefix >= 0x40 && prefix <= 0x4f;
288  return false;
289 }
290 
291 /*
292  * setPrefixPresent - Marks that a particular prefix is present as mandatory
293  *
294  * @param insn - The instruction to be marked as having the prefix.
295  * @param prefix - The prefix that is present.
296  */
297 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
298  uint8_t nextByte;
299  switch (prefix) {
300  case 0xf0:
301  insn->hasLockPrefix = true;
302  break;
303  case 0xf2:
304  case 0xf3:
305  if (lookAtByte(insn, &nextByte))
306  break;
307  // TODO:
308  // 1. There could be several 0x66
309  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
310  // it's not mandatory prefix
311  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
312  // 0x0f exactly after it to be mandatory prefix
313  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
314  // The last of 0xf2 /0xf3 is mandatory prefix
315  insn->mandatoryPrefix = prefix;
316  insn->repeatPrefix = prefix;
317  break;
318  case 0x66:
319  if (lookAtByte(insn, &nextByte))
320  break;
321  // 0x66 can't overwrite existing mandatory prefix and should be ignored
322  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
323  insn->mandatoryPrefix = prefix;
324  break;
325  }
326 }
327 
328 /*
329  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
330  * instruction as having them. Also sets the instruction's default operand,
331  * address, and other relevant data sizes to report operands correctly.
332  *
333  * @param insn - The instruction whose prefixes are to be read.
334  * @return - 0 if the instruction could be read until the end of the prefix
335  * bytes, and no prefixes conflicted; nonzero otherwise.
336  */
337 static int readPrefixes(struct InternalInstruction* insn) {
338  bool isPrefix = true;
339  uint8_t byte = 0;
340  uint8_t nextByte;
341 
342  dbgprintf(insn, "readPrefixes()");
343 
344  while (isPrefix) {
345  /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
346  if (consumeByte(insn, &byte))
347  break;
348 
349  /*
350  * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
351  * break and let it be disassembled as a normal "instruction".
352  */
353  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
354  break;
355 
356  if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
357  /*
358  * If the byte is 0xf2 or 0xf3, and any of the following conditions are
359  * met:
360  * - it is followed by a LOCK (0xf0) prefix
361  * - it is followed by an xchg instruction
362  * then it should be disassembled as a xacquire/xrelease not repne/rep.
363  */
364  if (((nextByte == 0xf0) ||
365  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
366  insn->xAcquireRelease = true;
367  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
368  break;
369  }
370  /*
371  * Also if the byte is 0xf3, and the following condition is met:
372  * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
373  * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
374  * then it should be disassembled as an xrelease not rep.
375  */
376  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
377  nextByte == 0xc6 || nextByte == 0xc7)) {
378  insn->xAcquireRelease = true;
379  if (nextByte != 0x90) // PAUSE instruction support
380  break;
381  }
382  if (isREX(insn, nextByte)) {
383  uint8_t nnextByte;
384  // Go to REX prefix after the current one
385  if (consumeByte(insn, &nnextByte))
386  return -1;
387  // We should be able to read next byte after REX prefix
388  if (lookAtByte(insn, &nnextByte))
389  return -1;
390  unconsumeByte(insn);
391  }
392  }
393 
394  switch (byte) {
395  case 0xf0: /* LOCK */
396  case 0xf2: /* REPNE/REPNZ */
397  case 0xf3: /* REP or REPE/REPZ */
398  setPrefixPresent(insn, byte);
399  break;
400  case 0x2e: /* CS segment override -OR- Branch not taken */
401  case 0x36: /* SS segment override -OR- Branch taken */
402  case 0x3e: /* DS segment override */
403  case 0x26: /* ES segment override */
404  case 0x64: /* FS segment override */
405  case 0x65: /* GS segment override */
406  switch (byte) {
407  case 0x2e:
409  break;
410  case 0x36:
412  break;
413  case 0x3e:
415  break;
416  case 0x26:
418  break;
419  case 0x64:
421  break;
422  case 0x65:
424  break;
425  default:
426  debug("Unhandled override");
427  return -1;
428  }
429  setPrefixPresent(insn, byte);
430  break;
431  case 0x66: /* Operand-size override */
432  insn->hasOpSize = true;
433  setPrefixPresent(insn, byte);
434  break;
435  case 0x67: /* Address-size override */
436  insn->hasAdSize = true;
437  setPrefixPresent(insn, byte);
438  break;
439  default: /* Not a prefix byte */
440  isPrefix = false;
441  break;
442  }
443 
444  if (isPrefix)
445  dbgprintf(insn, "Found prefix 0x%hhx", byte);
446  }
447 
449 
450  if (byte == 0x62) {
451  uint8_t byte1, byte2;
452 
453  if (consumeByte(insn, &byte1)) {
454  dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
455  return -1;
456  }
457 
458  if (lookAtByte(insn, &byte2)) {
459  dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
460  return -1;
461  }
462 
463  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
464  ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
466  } else {
467  unconsumeByte(insn); /* unconsume byte1 */
468  unconsumeByte(insn); /* unconsume byte */
469  }
470 
471  if (insn->vectorExtensionType == TYPE_EVEX) {
472  insn->vectorExtensionPrefix[0] = byte;
473  insn->vectorExtensionPrefix[1] = byte1;
474  if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
475  dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
476  return -1;
477  }
478  if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
479  dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
480  return -1;
481  }
482 
483  /* We simulate the REX prefix for simplicity's sake */
484  if (insn->mode == MODE_64BIT) {
485  insn->rexPrefix = 0x40
486  | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
487  | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
488  | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
489  | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
490  }
491 
492  dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
493  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
494  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
495  }
496  } else if (byte == 0xc4) {
497  uint8_t byte1;
498 
499  if (lookAtByte(insn, &byte1)) {
500  dbgprintf(insn, "Couldn't read second byte of VEX");
501  return -1;
502  }
503 
504  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
506  else
507  unconsumeByte(insn);
508 
509  if (insn->vectorExtensionType == TYPE_VEX_3B) {
510  insn->vectorExtensionPrefix[0] = byte;
511  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
512  consumeByte(insn, &insn->vectorExtensionPrefix[2]);
513 
514  /* We simulate the REX prefix for simplicity's sake */
515 
516  if (insn->mode == MODE_64BIT)
517  insn->rexPrefix = 0x40
518  | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
519  | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
520  | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
521  | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
522 
523  dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
524  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
525  insn->vectorExtensionPrefix[2]);
526  }
527  } else if (byte == 0xc5) {
528  uint8_t byte1;
529 
530  if (lookAtByte(insn, &byte1)) {
531  dbgprintf(insn, "Couldn't read second byte of VEX");
532  return -1;
533  }
534 
535  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
537  else
538  unconsumeByte(insn);
539 
540  if (insn->vectorExtensionType == TYPE_VEX_2B) {
541  insn->vectorExtensionPrefix[0] = byte;
542  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
543 
544  if (insn->mode == MODE_64BIT)
545  insn->rexPrefix = 0x40
546  | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
547 
548  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
549  default:
550  break;
551  case VEX_PREFIX_66:
552  insn->hasOpSize = true;
553  break;
554  }
555 
556  dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
557  insn->vectorExtensionPrefix[0],
558  insn->vectorExtensionPrefix[1]);
559  }
560  } else if (byte == 0x8f) {
561  uint8_t byte1;
562 
563  if (lookAtByte(insn, &byte1)) {
564  dbgprintf(insn, "Couldn't read second byte of XOP");
565  return -1;
566  }
567 
568  if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
570  else
571  unconsumeByte(insn);
572 
573  if (insn->vectorExtensionType == TYPE_XOP) {
574  insn->vectorExtensionPrefix[0] = byte;
575  consumeByte(insn, &insn->vectorExtensionPrefix[1]);
576  consumeByte(insn, &insn->vectorExtensionPrefix[2]);
577 
578  /* We simulate the REX prefix for simplicity's sake */
579 
580  if (insn->mode == MODE_64BIT)
581  insn->rexPrefix = 0x40
582  | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
583  | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
584  | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
585  | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
586 
587  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
588  default:
589  break;
590  case VEX_PREFIX_66:
591  insn->hasOpSize = true;
592  break;
593  }
594 
595  dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
596  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
597  insn->vectorExtensionPrefix[2]);
598  }
599  } else if (isREX(insn, byte)) {
600  if (lookAtByte(insn, &nextByte))
601  return -1;
602  insn->rexPrefix = byte;
603  dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
604  } else
605  unconsumeByte(insn);
606 
607  if (insn->mode == MODE_16BIT) {
608  insn->registerSize = (insn->hasOpSize ? 4 : 2);
609  insn->addressSize = (insn->hasAdSize ? 4 : 2);
610  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
611  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
612  } else if (insn->mode == MODE_32BIT) {
613  insn->registerSize = (insn->hasOpSize ? 2 : 4);
614  insn->addressSize = (insn->hasAdSize ? 2 : 4);
615  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
616  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
617  } else if (insn->mode == MODE_64BIT) {
618  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
619  insn->registerSize = 8;
620  insn->addressSize = (insn->hasAdSize ? 4 : 8);
621  insn->displacementSize = 4;
622  insn->immediateSize = 4;
623  } else {
624  insn->registerSize = (insn->hasOpSize ? 2 : 4);
625  insn->addressSize = (insn->hasAdSize ? 4 : 8);
626  insn->displacementSize = (insn->hasOpSize ? 2 : 4);
627  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
628  }
629  }
630 
631  return 0;
632 }
633 
634 static int readModRM(struct InternalInstruction* insn);
635 
636 /*
637  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
638  * extended or escape opcodes).
639  *
640  * @param insn - The instruction whose opcode is to be read.
641  * @return - 0 if the opcode could be read successfully; nonzero otherwise.
642  */
643 static int readOpcode(struct InternalInstruction* insn) {
644  /* Determine the length of the primary opcode */
645 
646  uint8_t current;
647 
648  dbgprintf(insn, "readOpcode()");
649 
650  insn->opcodeType = ONEBYTE;
651 
652  if (insn->vectorExtensionType == TYPE_EVEX) {
653  switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
654  default:
655  dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
657  return -1;
658  case VEX_LOB_0F:
659  insn->opcodeType = TWOBYTE;
660  return consumeByte(insn, &insn->opcode);
661  case VEX_LOB_0F38:
662  insn->opcodeType = THREEBYTE_38;
663  return consumeByte(insn, &insn->opcode);
664  case VEX_LOB_0F3A:
665  insn->opcodeType = THREEBYTE_3A;
666  return consumeByte(insn, &insn->opcode);
667  }
668  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
669  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
670  default:
671  dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
673  return -1;
674  case VEX_LOB_0F:
675  insn->opcodeType = TWOBYTE;
676  return consumeByte(insn, &insn->opcode);
677  case VEX_LOB_0F38:
678  insn->opcodeType = THREEBYTE_38;
679  return consumeByte(insn, &insn->opcode);
680  case VEX_LOB_0F3A:
681  insn->opcodeType = THREEBYTE_3A;
682  return consumeByte(insn, &insn->opcode);
683  }
684  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
685  insn->opcodeType = TWOBYTE;
686  return consumeByte(insn, &insn->opcode);
687  } else if (insn->vectorExtensionType == TYPE_XOP) {
688  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
689  default:
690  dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
692  return -1;
693  case XOP_MAP_SELECT_8:
694  insn->opcodeType = XOP8_MAP;
695  return consumeByte(insn, &insn->opcode);
696  case XOP_MAP_SELECT_9:
697  insn->opcodeType = XOP9_MAP;
698  return consumeByte(insn, &insn->opcode);
699  case XOP_MAP_SELECT_A:
700  insn->opcodeType = XOPA_MAP;
701  return consumeByte(insn, &insn->opcode);
702  }
703  }
704 
705  if (consumeByte(insn, &current))
706  return -1;
707 
708  if (current == 0x0f) {
709  dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
710 
711  if (consumeByte(insn, &current))
712  return -1;
713 
714  if (current == 0x38) {
715  dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
716 
717  if (consumeByte(insn, &current))
718  return -1;
719 
720  insn->opcodeType = THREEBYTE_38;
721  } else if (current == 0x3a) {
722  dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
723 
724  if (consumeByte(insn, &current))
725  return -1;
726 
727  insn->opcodeType = THREEBYTE_3A;
728  } else if (current == 0x0f) {
729  dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
730 
731  // Consume operands before the opcode to comply with the 3DNow encoding
732  if (readModRM(insn))
733  return -1;
734 
735  if (consumeByte(insn, &current))
736  return -1;
737 
738  insn->opcodeType = THREEDNOW_MAP;
739  } else {
740  dbgprintf(insn, "Didn't find a three-byte escape prefix");
741 
742  insn->opcodeType = TWOBYTE;
743  }
744  } else if (insn->mandatoryPrefix)
745  // The opcode with mandatory prefix must start with opcode escape.
746  // If not it's legacy repeat prefix
747  insn->mandatoryPrefix = 0;
748 
749  /*
750  * At this point we have consumed the full opcode.
751  * Anything we consume from here on must be unconsumed.
752  */
753 
754  insn->opcode = current;
755 
756  return 0;
757 }
758 
759 /*
760  * getIDWithAttrMask - Determines the ID of an instruction, consuming
761  * the ModR/M byte as appropriate for extended and escape opcodes,
762  * and using a supplied attribute mask.
763  *
764  * @param instructionID - A pointer whose target is filled in with the ID of the
765  * instruction.
766  * @param insn - The instruction whose ID is to be determined.
767  * @param attrMask - The attribute mask to search.
768  * @return - 0 if the ModR/M could be read when needed or was not
769  * needed; nonzero otherwise.
770  */
771 static int getIDWithAttrMask(uint16_t* instructionID,
772  struct InternalInstruction* insn,
773  uint16_t attrMask) {
774  bool hasModRMExtension;
775 
776  InstructionContext instructionClass = contextForAttrs(attrMask);
777 
778  hasModRMExtension = modRMRequired(insn->opcodeType,
779  instructionClass,
780  insn->opcode);
781 
782  if (hasModRMExtension) {
783  if (readModRM(insn))
784  return -1;
785 
786  *instructionID = decode(insn->opcodeType,
787  instructionClass,
788  insn->opcode,
789  insn->modRM);
790  } else {
791  *instructionID = decode(insn->opcodeType,
792  instructionClass,
793  insn->opcode,
794  0);
795  }
796 
797  return 0;
798 }
799 
800 /*
801  * is16BitEquivalent - Determines whether two instruction names refer to
802  * equivalent instructions but one is 16-bit whereas the other is not.
803  *
804  * @param orig - The instruction that is not 16-bit
805  * @param equiv - The instruction that is 16-bit
806  */
807 static bool is16BitEquivalent(const char *orig, const char *equiv) {
808  off_t i;
809 
810  for (i = 0;; i++) {
811  if (orig[i] == '\0' && equiv[i] == '\0')
812  return true;
813  if (orig[i] == '\0' || equiv[i] == '\0')
814  return false;
815  if (orig[i] != equiv[i]) {
816  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
817  continue;
818  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
819  continue;
820  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
821  continue;
822  return false;
823  }
824  }
825 }
826 
827 /*
828  * is64Bit - Determines whether this instruction is a 64-bit instruction.
829  *
830  * @param name - The instruction that is not 16-bit
831  */
832 static bool is64Bit(const char *name) {
833  off_t i;
834 
835  for (i = 0;; ++i) {
836  if (name[i] == '\0')
837  return false;
838  if (name[i] == '6' && name[i+1] == '4')
839  return true;
840  }
841 }
842 
843 /*
844  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
845  * appropriate for extended and escape opcodes. Determines the attributes and
846  * context for the instruction before doing so.
847  *
848  * @param insn - The instruction whose ID is to be determined.
849  * @return - 0 if the ModR/M could be read when needed or was not needed;
850  * nonzero otherwise.
851  */
852 static int getID(struct InternalInstruction* insn, const void *miiArg) {
853  uint16_t attrMask;
854  uint16_t instructionID;
855 
856  dbgprintf(insn, "getID()");
857 
858  attrMask = ATTR_NONE;
859 
860  if (insn->mode == MODE_64BIT)
861  attrMask |= ATTR_64BIT;
862 
863  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
864  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
865 
866  if (insn->vectorExtensionType == TYPE_EVEX) {
867  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
868  case VEX_PREFIX_66:
869  attrMask |= ATTR_OPSIZE;
870  break;
871  case VEX_PREFIX_F3:
872  attrMask |= ATTR_XS;
873  break;
874  case VEX_PREFIX_F2:
875  attrMask |= ATTR_XD;
876  break;
877  }
878 
879  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
880  attrMask |= ATTR_EVEXKZ;
881  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
882  attrMask |= ATTR_EVEXB;
884  attrMask |= ATTR_EVEXK;
885  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
886  attrMask |= ATTR_EVEXL;
888  attrMask |= ATTR_EVEXL2;
889  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
890  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
891  case VEX_PREFIX_66:
892  attrMask |= ATTR_OPSIZE;
893  break;
894  case VEX_PREFIX_F3:
895  attrMask |= ATTR_XS;
896  break;
897  case VEX_PREFIX_F2:
898  attrMask |= ATTR_XD;
899  break;
900  }
901 
902  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
903  attrMask |= ATTR_VEXL;
904  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
905  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
906  case VEX_PREFIX_66:
907  attrMask |= ATTR_OPSIZE;
908  break;
909  case VEX_PREFIX_F3:
910  attrMask |= ATTR_XS;
911  break;
912  case VEX_PREFIX_F2:
913  attrMask |= ATTR_XD;
914  break;
915  }
916 
917  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
918  attrMask |= ATTR_VEXL;
919  } else if (insn->vectorExtensionType == TYPE_XOP) {
920  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
921  case VEX_PREFIX_66:
922  attrMask |= ATTR_OPSIZE;
923  break;
924  case VEX_PREFIX_F3:
925  attrMask |= ATTR_XS;
926  break;
927  case VEX_PREFIX_F2:
928  attrMask |= ATTR_XD;
929  break;
930  }
931 
932  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
933  attrMask |= ATTR_VEXL;
934  } else {
935  return -1;
936  }
937  } else if (!insn->mandatoryPrefix) {
938  // If we don't have mandatory prefix we should use legacy prefixes here
939  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
940  attrMask |= ATTR_OPSIZE;
941  if (insn->hasAdSize)
942  attrMask |= ATTR_ADSIZE;
943  if (insn->opcodeType == ONEBYTE) {
944  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
945  // Special support for PAUSE
946  attrMask |= ATTR_XS;
947  } else {
948  if (insn->repeatPrefix == 0xf2)
949  attrMask |= ATTR_XD;
950  else if (insn->repeatPrefix == 0xf3)
951  attrMask |= ATTR_XS;
952  }
953  } else {
954  switch (insn->mandatoryPrefix) {
955  case 0xf2:
956  attrMask |= ATTR_XD;
957  break;
958  case 0xf3:
959  attrMask |= ATTR_XS;
960  break;
961  case 0x66:
962  if (insn->mode != MODE_16BIT)
963  attrMask |= ATTR_OPSIZE;
964  break;
965  case 0x67:
966  attrMask |= ATTR_ADSIZE;
967  break;
968  }
969 
970  }
971 
972  if (insn->rexPrefix & 0x08) {
973  attrMask |= ATTR_REXW;
974  attrMask &= ~ATTR_ADSIZE;
975  }
976 
977  /*
978  * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
979  * of the AdSize prefix is inverted w.r.t. 32-bit mode.
980  */
981  if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
982  insn->opcode == 0xE3)
983  attrMask ^= ATTR_ADSIZE;
984 
985  // If we're in 16-bit mode and this is one of the relative jumps and opsize
986  // prefix isn't present, we need to force the opsize attribute since the
987  // prefix is inverted relative to 32-bit mode.
988  if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
989  insn->opcodeType == ONEBYTE &&
990  (insn->opcode == 0xE8 || insn->opcode == 0xE9))
991  attrMask |= ATTR_OPSIZE;
992 
993  if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
994  insn->opcodeType == TWOBYTE &&
995  insn->opcode >= 0x80 && insn->opcode <= 0x8F)
996  attrMask |= ATTR_OPSIZE;
997 
998  if (getIDWithAttrMask(&instructionID, insn, attrMask))
999  return -1;
1000 
1001  /* The following clauses compensate for limitations of the tables. */
1002 
1003  if (insn->mode != MODE_64BIT &&
1005  /*
1006  * The tables can't distinquish between cases where the W-bit is used to
1007  * select register size and cases where its a required part of the opcode.
1008  */
1009  if ((insn->vectorExtensionType == TYPE_EVEX &&
1010  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1011  (insn->vectorExtensionType == TYPE_VEX_3B &&
1012  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1013  (insn->vectorExtensionType == TYPE_XOP &&
1014  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1015 
1016  uint16_t instructionIDWithREXW;
1017  if (getIDWithAttrMask(&instructionIDWithREXW,
1018  insn, attrMask | ATTR_REXW)) {
1019  insn->instructionID = instructionID;
1020  insn->spec = specifierForUID(instructionID);
1021  return 0;
1022  }
1023 
1024  auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
1025  // If not a 64-bit instruction. Switch the opcode.
1026  if (!is64Bit(SpecName.data())) {
1027  insn->instructionID = instructionIDWithREXW;
1028  insn->spec = specifierForUID(instructionIDWithREXW);
1029  return 0;
1030  }
1031  }
1032  }
1033 
1034  /*
1035  * Absolute moves, umonitor, and movdir64b need special handling.
1036  * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1037  * inverted w.r.t.
1038  * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1039  * any position.
1040  */
1041  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1042  (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1043  (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1044  /* Make sure we observed the prefixes in any position. */
1045  if (insn->hasAdSize)
1046  attrMask |= ATTR_ADSIZE;
1047  if (insn->hasOpSize)
1048  attrMask |= ATTR_OPSIZE;
1049 
1050  /* In 16-bit, invert the attributes. */
1051  if (insn->mode == MODE_16BIT) {
1052  attrMask ^= ATTR_ADSIZE;
1053 
1054  /* The OpSize attribute is only valid with the absolute moves. */
1055  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1056  attrMask ^= ATTR_OPSIZE;
1057  }
1058 
1059  if (getIDWithAttrMask(&instructionID, insn, attrMask))
1060  return -1;
1061 
1062  insn->instructionID = instructionID;
1063  insn->spec = specifierForUID(instructionID);
1064  return 0;
1065  }
1066 
1067  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1068  !(attrMask & ATTR_OPSIZE)) {
1069  /*
1070  * The instruction tables make no distinction between instructions that
1071  * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1072  * particular spot (i.e., many MMX operations). In general we're
1073  * conservative, but in the specific case where OpSize is present but not
1074  * in the right place we check if there's a 16-bit operation.
1075  */
1076 
1077  const struct InstructionSpecifier *spec;
1078  uint16_t instructionIDWithOpsize;
1079  llvm::StringRef specName, specWithOpSizeName;
1080 
1081  spec = specifierForUID(instructionID);
1082 
1083  if (getIDWithAttrMask(&instructionIDWithOpsize,
1084  insn,
1085  attrMask | ATTR_OPSIZE)) {
1086  /*
1087  * ModRM required with OpSize but not present; give up and return version
1088  * without OpSize set
1089  */
1090 
1091  insn->instructionID = instructionID;
1092  insn->spec = spec;
1093  return 0;
1094  }
1095 
1096  specName = GetInstrName(instructionID, miiArg);
1097  specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
1098 
1099  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1100  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1101  insn->instructionID = instructionIDWithOpsize;
1102  insn->spec = specifierForUID(instructionIDWithOpsize);
1103  } else {
1104  insn->instructionID = instructionID;
1105  insn->spec = spec;
1106  }
1107  return 0;
1108  }
1109 
1110  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1111  insn->rexPrefix & 0x01) {
1112  /*
1113  * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1114  * it should decode as XCHG %r8, %eax.
1115  */
1116 
1117  const struct InstructionSpecifier *spec;
1118  uint16_t instructionIDWithNewOpcode;
1119  const struct InstructionSpecifier *specWithNewOpcode;
1120 
1121  spec = specifierForUID(instructionID);
1122 
1123  /* Borrow opcode from one of the other XCHGar opcodes */
1124  insn->opcode = 0x91;
1125 
1126  if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1127  insn,
1128  attrMask)) {
1129  insn->opcode = 0x90;
1130 
1131  insn->instructionID = instructionID;
1132  insn->spec = spec;
1133  return 0;
1134  }
1135 
1136  specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1137 
1138  /* Change back */
1139  insn->opcode = 0x90;
1140 
1141  insn->instructionID = instructionIDWithNewOpcode;
1142  insn->spec = specWithNewOpcode;
1143 
1144  return 0;
1145  }
1146 
1147  insn->instructionID = instructionID;
1148  insn->spec = specifierForUID(insn->instructionID);
1149 
1150  return 0;
1151 }
1152 
1153 /*
1154  * readSIB - Consumes the SIB byte to determine addressing information for an
1155  * instruction.
1156  *
1157  * @param insn - The instruction whose SIB byte is to be read.
1158  * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1159  */
1160 static int readSIB(struct InternalInstruction* insn) {
1161  SIBBase sibBaseBase = SIB_BASE_NONE;
1162  uint8_t index, base;
1163 
1164  dbgprintf(insn, "readSIB()");
1165 
1166  if (insn->consumedSIB)
1167  return 0;
1168 
1169  insn->consumedSIB = true;
1170 
1171  switch (insn->addressSize) {
1172  case 2:
1173  dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1174  return -1;
1175  case 4:
1176  insn->sibIndexBase = SIB_INDEX_EAX;
1177  sibBaseBase = SIB_BASE_EAX;
1178  break;
1179  case 8:
1180  insn->sibIndexBase = SIB_INDEX_RAX;
1181  sibBaseBase = SIB_BASE_RAX;
1182  break;
1183  }
1184 
1185  if (consumeByte(insn, &insn->sib))
1186  return -1;
1187 
1188  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1189 
1190  if (index == 0x4) {
1191  insn->sibIndex = SIB_INDEX_NONE;
1192  } else {
1193  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
1194  }
1195 
1196  insn->sibScale = 1 << scaleFromSIB(insn->sib);
1197 
1198  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1199 
1200  switch (base) {
1201  case 0x5:
1202  case 0xd:
1203  switch (modFromModRM(insn->modRM)) {
1204  case 0x0:
1205  insn->eaDisplacement = EA_DISP_32;
1206  insn->sibBase = SIB_BASE_NONE;
1207  break;
1208  case 0x1:
1209  insn->eaDisplacement = EA_DISP_8;
1210  insn->sibBase = (SIBBase)(sibBaseBase + base);
1211  break;
1212  case 0x2:
1213  insn->eaDisplacement = EA_DISP_32;
1214  insn->sibBase = (SIBBase)(sibBaseBase + base);
1215  break;
1216  case 0x3:
1217  debug("Cannot have Mod = 0b11 and a SIB byte");
1218  return -1;
1219  }
1220  break;
1221  default:
1222  insn->sibBase = (SIBBase)(sibBaseBase + base);
1223  break;
1224  }
1225 
1226  return 0;
1227 }
1228 
1229 /*
1230  * readDisplacement - Consumes the displacement of an instruction.
1231  *
1232  * @param insn - The instruction whose displacement is to be read.
1233  * @return - 0 if the displacement byte was successfully read; nonzero
1234  * otherwise.
1235  */
1236 static int readDisplacement(struct InternalInstruction* insn) {
1237  int8_t d8;
1238  int16_t d16;
1239  int32_t d32;
1240 
1241  dbgprintf(insn, "readDisplacement()");
1242 
1243  if (insn->consumedDisplacement)
1244  return 0;
1245 
1246  insn->consumedDisplacement = true;
1247  insn->displacementOffset = insn->readerCursor - insn->startLocation;
1248 
1249  switch (insn->eaDisplacement) {
1250  case EA_DISP_NONE:
1251  insn->consumedDisplacement = false;
1252  break;
1253  case EA_DISP_8:
1254  if (consumeInt8(insn, &d8))
1255  return -1;
1256  insn->displacement = d8;
1257  break;
1258  case EA_DISP_16:
1259  if (consumeInt16(insn, &d16))
1260  return -1;
1261  insn->displacement = d16;
1262  break;
1263  case EA_DISP_32:
1264  if (consumeInt32(insn, &d32))
1265  return -1;
1266  insn->displacement = d32;
1267  break;
1268  }
1269 
1270  insn->consumedDisplacement = true;
1271  return 0;
1272 }
1273 
1274 /*
1275  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1276  * displacement) for an instruction and interprets it.
1277  *
1278  * @param insn - The instruction whose addressing information is to be read.
1279  * @return - 0 if the information was successfully read; nonzero otherwise.
1280  */
1281 static int readModRM(struct InternalInstruction* insn) {
1282  uint8_t mod, rm, reg, evexrm;
1283 
1284  dbgprintf(insn, "readModRM()");
1285 
1286  if (insn->consumedModRM)
1287  return 0;
1288 
1289  if (consumeByte(insn, &insn->modRM))
1290  return -1;
1291  insn->consumedModRM = true;
1292 
1293  mod = modFromModRM(insn->modRM);
1294  rm = rmFromModRM(insn->modRM);
1295  reg = regFromModRM(insn->modRM);
1296 
1297  /*
1298  * This goes by insn->registerSize to pick the correct register, which messes
1299  * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1300  * fixupReg().
1301  */
1302  switch (insn->registerSize) {
1303  case 2:
1304  insn->regBase = MODRM_REG_AX;
1305  insn->eaRegBase = EA_REG_AX;
1306  break;
1307  case 4:
1308  insn->regBase = MODRM_REG_EAX;
1309  insn->eaRegBase = EA_REG_EAX;
1310  break;
1311  case 8:
1312  insn->regBase = MODRM_REG_RAX;
1313  insn->eaRegBase = EA_REG_RAX;
1314  break;
1315  }
1316 
1317  reg |= rFromREX(insn->rexPrefix) << 3;
1318  rm |= bFromREX(insn->rexPrefix) << 3;
1319 
1320  evexrm = 0;
1321  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
1322  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1323  evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1324  }
1325 
1326  insn->reg = (Reg)(insn->regBase + reg);
1327 
1328  switch (insn->addressSize) {
1329  case 2: {
1330  EABase eaBaseBase = EA_BASE_BX_SI;
1331 
1332  switch (mod) {
1333  case 0x0:
1334  if (rm == 0x6) {
1335  insn->eaBase = EA_BASE_NONE;
1336  insn->eaDisplacement = EA_DISP_16;
1337  if (readDisplacement(insn))
1338  return -1;
1339  } else {
1340  insn->eaBase = (EABase)(eaBaseBase + rm);
1341  insn->eaDisplacement = EA_DISP_NONE;
1342  }
1343  break;
1344  case 0x1:
1345  insn->eaBase = (EABase)(eaBaseBase + rm);
1346  insn->eaDisplacement = EA_DISP_8;
1347  insn->displacementSize = 1;
1348  if (readDisplacement(insn))
1349  return -1;
1350  break;
1351  case 0x2:
1352  insn->eaBase = (EABase)(eaBaseBase + rm);
1353  insn->eaDisplacement = EA_DISP_16;
1354  if (readDisplacement(insn))
1355  return -1;
1356  break;
1357  case 0x3:
1358  insn->eaBase = (EABase)(insn->eaRegBase + rm);
1359  if (readDisplacement(insn))
1360  return -1;
1361  break;
1362  }
1363  break;
1364  }
1365  case 4:
1366  case 8: {
1367  EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1368 
1369  switch (mod) {
1370  case 0x0:
1371  insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1372  // In determining whether RIP-relative mode is used (rm=5),
1373  // or whether a SIB byte is present (rm=4),
1374  // the extension bits (REX.b and EVEX.x) are ignored.
1375  switch (rm & 7) {
1376  case 0x4: // SIB byte is present
1377  insn->eaBase = (insn->addressSize == 4 ?
1378  EA_BASE_sib : EA_BASE_sib64);
1379  if (readSIB(insn) || readDisplacement(insn))
1380  return -1;
1381  break;
1382  case 0x5: // RIP-relative
1383  insn->eaBase = EA_BASE_NONE;
1384  insn->eaDisplacement = EA_DISP_32;
1385  if (readDisplacement(insn))
1386  return -1;
1387  break;
1388  default:
1389  insn->eaBase = (EABase)(eaBaseBase + rm);
1390  break;
1391  }
1392  break;
1393  case 0x1:
1394  insn->displacementSize = 1;
1396  case 0x2:
1397  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1398  switch (rm & 7) {
1399  case 0x4: // SIB byte is present
1400  insn->eaBase = EA_BASE_sib;
1401  if (readSIB(insn) || readDisplacement(insn))
1402  return -1;
1403  break;
1404  default:
1405  insn->eaBase = (EABase)(eaBaseBase + rm);
1406  if (readDisplacement(insn))
1407  return -1;
1408  break;
1409  }
1410  break;
1411  case 0x3:
1412  insn->eaDisplacement = EA_DISP_NONE;
1413  insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
1414  break;
1415  }
1416  break;
1417  }
1418  } /* switch (insn->addressSize) */
1419 
1420  return 0;
1421 }
1422 
1423 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
1424  static uint16_t name(struct InternalInstruction *insn, \
1425  OperandType type, \
1426  uint8_t index, \
1427  uint8_t *valid) { \
1428  *valid = 1; \
1429  switch (type) { \
1430  default: \
1431  debug("Unhandled register type"); \
1432  *valid = 0; \
1433  return 0; \
1434  case TYPE_Rv: \
1435  return base + index; \
1436  case TYPE_R8: \
1437  index &= mask; \
1438  if (index > 0xf) \
1439  *valid = 0; \
1440  if (insn->rexPrefix && \
1441  index >= 4 && index <= 7) { \
1442  return prefix##_SPL + (index - 4); \
1443  } else { \
1444  return prefix##_AL + index; \
1445  } \
1446  case TYPE_R16: \
1447  index &= mask; \
1448  if (index > 0xf) \
1449  *valid = 0; \
1450  return prefix##_AX + index; \
1451  case TYPE_R32: \
1452  index &= mask; \
1453  if (index > 0xf) \
1454  *valid = 0; \
1455  return prefix##_EAX + index; \
1456  case TYPE_R64: \
1457  index &= mask; \
1458  if (index > 0xf) \
1459  *valid = 0; \
1460  return prefix##_RAX + index; \
1461  case TYPE_ZMM: \
1462  return prefix##_ZMM0 + index; \
1463  case TYPE_YMM: \
1464  return prefix##_YMM0 + index; \
1465  case TYPE_XMM: \
1466  return prefix##_XMM0 + index; \
1467  case TYPE_VK: \
1468  index &= 0xf; \
1469  if (index > 7) \
1470  *valid = 0; \
1471  return prefix##_K0 + index; \
1472  case TYPE_MM64: \
1473  return prefix##_MM0 + (index & 0x7); \
1474  case TYPE_SEGMENTREG: \
1475  if ((index & 7) > 5) \
1476  *valid = 0; \
1477  return prefix##_ES + (index & 7); \
1478  case TYPE_DEBUGREG: \
1479  return prefix##_DR0 + index; \
1480  case TYPE_CONTROLREG: \
1481  return prefix##_CR0 + index; \
1482  case TYPE_BNDR: \
1483  if (index > 3) \
1484  *valid = 0; \
1485  return prefix##_BND0 + index; \
1486  case TYPE_MVSIBX: \
1487  return prefix##_XMM0 + index; \
1488  case TYPE_MVSIBY: \
1489  return prefix##_YMM0 + index; \
1490  case TYPE_MVSIBZ: \
1491  return prefix##_ZMM0 + index; \
1492  } \
1493  }
1494 
1495 /*
1496  * fixup*Value - Consults an operand type to determine the meaning of the
1497  * reg or R/M field. If the operand is an XMM operand, for example, an
1498  * operand would be XMM0 instead of AX, which readModRM() would otherwise
1499  * misinterpret it as.
1500  *
1501  * @param insn - The instruction containing the operand.
1502  * @param type - The operand type.
1503  * @param index - The existing value of the field as reported by readModRM().
1504  * @param valid - The address of a uint8_t. The target is set to 1 if the
1505  * field is valid for the register class; 0 if not.
1506  * @return - The proper value.
1507  */
1508 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
1509 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
1510 
1511 /*
1512  * fixupReg - Consults an operand specifier to determine which of the
1513  * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1514  *
1515  * @param insn - See fixup*Value().
1516  * @param op - The operand specifier.
1517  * @return - 0 if fixup was successful; -1 if the register returned was
1518  * invalid for its class.
1519  */
1520 static int fixupReg(struct InternalInstruction *insn,
1521  const struct OperandSpecifier *op) {
1522  uint8_t valid;
1523 
1524  dbgprintf(insn, "fixupReg()");
1525 
1526  switch ((OperandEncoding)op->encoding) {
1527  default:
1528  debug("Expected a REG or R/M encoding in fixupReg");
1529  return -1;
1530  case ENCODING_VVVV:
1531  insn->vvvv = (Reg)fixupRegValue(insn,
1532  (OperandType)op->type,
1533  insn->vvvv,
1534  &valid);
1535  if (!valid)
1536  return -1;
1537  break;
1538  case ENCODING_REG:
1539  insn->reg = (Reg)fixupRegValue(insn,
1540  (OperandType)op->type,
1541  insn->reg - insn->regBase,
1542  &valid);
1543  if (!valid)
1544  return -1;
1545  break;
1547  if (insn->eaBase >= insn->eaRegBase) {
1548  insn->eaBase = (EABase)fixupRMValue(insn,
1549  (OperandType)op->type,
1550  insn->eaBase - insn->eaRegBase,
1551  &valid);
1552  if (!valid)
1553  return -1;
1554  }
1555  break;
1556  }
1557 
1558  return 0;
1559 }
1560 
1561 /*
1562  * readOpcodeRegister - Reads an operand from the opcode field of an
1563  * instruction and interprets it appropriately given the operand width.
1564  * Handles AddRegFrm instructions.
1565  *
1566  * @param insn - the instruction whose opcode field is to be read.
1567  * @param size - The width (in bytes) of the register being specified.
1568  * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1569  * RAX.
1570  * @return - 0 on success; nonzero otherwise.
1571  */
1572 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1573  dbgprintf(insn, "readOpcodeRegister()");
1574 
1575  if (size == 0)
1576  size = insn->registerSize;
1577 
1578  switch (size) {
1579  case 1:
1580  insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1581  | (insn->opcode & 7)));
1582  if (insn->rexPrefix &&
1583  insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1584  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1585  insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1586  + (insn->opcodeRegister - MODRM_REG_AL - 4));
1587  }
1588 
1589  break;
1590  case 2:
1591  insn->opcodeRegister = (Reg)(MODRM_REG_AX
1592  + ((bFromREX(insn->rexPrefix) << 3)
1593  | (insn->opcode & 7)));
1594  break;
1595  case 4:
1596  insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1597  + ((bFromREX(insn->rexPrefix) << 3)
1598  | (insn->opcode & 7)));
1599  break;
1600  case 8:
1601  insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1602  + ((bFromREX(insn->rexPrefix) << 3)
1603  | (insn->opcode & 7)));
1604  break;
1605  }
1606 
1607  return 0;
1608 }
1609 
1610 /*
1611  * readImmediate - Consumes an immediate operand from an instruction, given the
1612  * desired operand size.
1613  *
1614  * @param insn - The instruction whose operand is to be read.
1615  * @param size - The width (in bytes) of the operand.
1616  * @return - 0 if the immediate was successfully consumed; nonzero
1617  * otherwise.
1618  */
1619 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1620  uint8_t imm8;
1621  uint16_t imm16;
1622  uint32_t imm32;
1623  uint64_t imm64;
1624 
1625  dbgprintf(insn, "readImmediate()");
1626 
1627  if (insn->numImmediatesConsumed == 2) {
1628  debug("Already consumed two immediates");
1629  return -1;
1630  }
1631 
1632  if (size == 0)
1633  size = insn->immediateSize;
1634  else
1635  insn->immediateSize = size;
1636  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1637 
1638  switch (size) {
1639  case 1:
1640  if (consumeByte(insn, &imm8))
1641  return -1;
1642  insn->immediates[insn->numImmediatesConsumed] = imm8;
1643  break;
1644  case 2:
1645  if (consumeUInt16(insn, &imm16))
1646  return -1;
1647  insn->immediates[insn->numImmediatesConsumed] = imm16;
1648  break;
1649  case 4:
1650  if (consumeUInt32(insn, &imm32))
1651  return -1;
1652  insn->immediates[insn->numImmediatesConsumed] = imm32;
1653  break;
1654  case 8:
1655  if (consumeUInt64(insn, &imm64))
1656  return -1;
1657  insn->immediates[insn->numImmediatesConsumed] = imm64;
1658  break;
1659  }
1660 
1661  insn->numImmediatesConsumed++;
1662 
1663  return 0;
1664 }
1665 
1666 /*
1667  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1668  *
1669  * @param insn - The instruction whose operand is to be read.
1670  * @return - 0 if the vvvv was successfully consumed; nonzero
1671  * otherwise.
1672  */
1673 static int readVVVV(struct InternalInstruction* insn) {
1674  dbgprintf(insn, "readVVVV()");
1675 
1676  int vvvv;
1677  if (insn->vectorExtensionType == TYPE_EVEX)
1678  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1680  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1681  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1682  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1683  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1684  else if (insn->vectorExtensionType == TYPE_XOP)
1685  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1686  else
1687  return -1;
1688 
1689  if (insn->mode != MODE_64BIT)
1690  vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1691 
1692  insn->vvvv = static_cast<Reg>(vvvv);
1693  return 0;
1694 }
1695 
1696 /*
1697  * readMaskRegister - Reads an mask register from the opcode field of an
1698  * instruction.
1699  *
1700  * @param insn - The instruction whose opcode field is to be read.
1701  * @return - 0 on success; nonzero otherwise.
1702  */
1703 static int readMaskRegister(struct InternalInstruction* insn) {
1704  dbgprintf(insn, "readMaskRegister()");
1705 
1706  if (insn->vectorExtensionType != TYPE_EVEX)
1707  return -1;
1708 
1709  insn->writemask =
1710  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1711  return 0;
1712 }
1713 
1714 /*
1715  * readOperands - Consults the specifier for an instruction and consumes all
1716  * operands for that instruction, interpreting them as it goes.
1717  *
1718  * @param insn - The instruction whose operands are to be read and interpreted.
1719  * @return - 0 if all operands could be read; nonzero otherwise.
1720  */
1721 static int readOperands(struct InternalInstruction* insn) {
1722  int hasVVVV, needVVVV;
1723  int sawRegImm = 0;
1724 
1725  dbgprintf(insn, "readOperands()");
1726 
1727  /* If non-zero vvvv specified, need to make sure one of the operands
1728  uses it. */
1729  hasVVVV = !readVVVV(insn);
1730  needVVVV = hasVVVV && (insn->vvvv != 0);
1731 
1732  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1733  switch (Op.encoding) {
1734  case ENCODING_NONE:
1735  case ENCODING_SI:
1736  case ENCODING_DI:
1737  break;
1739  // VSIB can use the V2 bit so check only the other bits.
1740  if (needVVVV)
1741  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1742  if (readModRM(insn))
1743  return -1;
1744 
1745  // Reject if SIB wasn't used.
1746  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1747  return -1;
1748 
1749  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1750  if (insn->sibIndex == SIB_INDEX_NONE)
1751  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1752 
1753  // If EVEX.v2 is set this is one of the 16-31 registers.
1754  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1756  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1757 
1758  // Adjust the index register to the correct size.
1759  switch ((OperandType)Op.type) {
1760  default:
1761  debug("Unhandled VSIB index type");
1762  return -1;
1763  case TYPE_MVSIBX:
1764  insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
1765  (insn->sibIndex - insn->sibIndexBase));
1766  break;
1767  case TYPE_MVSIBY:
1768  insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
1769  (insn->sibIndex - insn->sibIndexBase));
1770  break;
1771  case TYPE_MVSIBZ:
1772  insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
1773  (insn->sibIndex - insn->sibIndexBase));
1774  break;
1775  }
1776 
1777  // Apply the AVX512 compressed displacement scaling factor.
1778  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1779  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1780  break;
1781  case ENCODING_REG:
1783  if (readModRM(insn))
1784  return -1;
1785  if (fixupReg(insn, &Op))
1786  return -1;
1787  // Apply the AVX512 compressed displacement scaling factor.
1788  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1789  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1790  break;
1791  case ENCODING_IB:
1792  if (sawRegImm) {
1793  /* Saw a register immediate so don't read again and instead split the
1794  previous immediate. FIXME: This is a hack. */
1795  insn->immediates[insn->numImmediatesConsumed] =
1796  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1797  ++insn->numImmediatesConsumed;
1798  break;
1799  }
1800  if (readImmediate(insn, 1))
1801  return -1;
1802  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1803  sawRegImm = 1;
1804  break;
1805  case ENCODING_IW:
1806  if (readImmediate(insn, 2))
1807  return -1;
1808  break;
1809  case ENCODING_ID:
1810  if (readImmediate(insn, 4))
1811  return -1;
1812  break;
1813  case ENCODING_IO:
1814  if (readImmediate(insn, 8))
1815  return -1;
1816  break;
1817  case ENCODING_Iv:
1818  if (readImmediate(insn, insn->immediateSize))
1819  return -1;
1820  break;
1821  case ENCODING_Ia:
1822  if (readImmediate(insn, insn->addressSize))
1823  return -1;
1824  break;
1825  case ENCODING_IRC:
1826  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1828  break;
1829  case ENCODING_RB:
1830  if (readOpcodeRegister(insn, 1))
1831  return -1;
1832  break;
1833  case ENCODING_RW:
1834  if (readOpcodeRegister(insn, 2))
1835  return -1;
1836  break;
1837  case ENCODING_RD:
1838  if (readOpcodeRegister(insn, 4))
1839  return -1;
1840  break;
1841  case ENCODING_RO:
1842  if (readOpcodeRegister(insn, 8))
1843  return -1;
1844  break;
1845  case ENCODING_Rv:
1846  if (readOpcodeRegister(insn, 0))
1847  return -1;
1848  break;
1849  case ENCODING_CC:
1850  insn->immediates[1] = insn->opcode & 0xf;
1851  break;
1852  case ENCODING_FP:
1853  break;
1854  case ENCODING_VVVV:
1855  needVVVV = 0; /* Mark that we have found a VVVV operand. */
1856  if (!hasVVVV)
1857  return -1;
1858  if (insn->mode != MODE_64BIT)
1859  insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1860  if (fixupReg(insn, &Op))
1861  return -1;
1862  break;
1863  case ENCODING_WRITEMASK:
1864  if (readMaskRegister(insn))
1865  return -1;
1866  break;
1867  case ENCODING_DUP:
1868  break;
1869  default:
1870  dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1871  return -1;
1872  }
1873  }
1874 
1875  /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1876  if (needVVVV) return -1;
1877 
1878  return 0;
1879 }
1880 
1881 /*
1882  * decodeInstruction - Reads and interprets a full instruction provided by the
1883  * user.
1884  *
1885  * @param insn - A pointer to the instruction to be populated. Must be
1886  * pre-allocated.
1887  * @param reader - The function to be used to read the instruction's bytes.
1888  * @param readerArg - A generic argument to be passed to the reader to store
1889  * any internal state.
1890  * @param logger - If non-NULL, the function to be used to write log messages
1891  * and warnings.
1892  * @param loggerArg - A generic argument to be passed to the logger to store
1893  * any internal state.
1894  * @param startLoc - The address (in the reader's address space) of the first
1895  * byte in the instruction.
1896  * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1897  * decode the instruction in.
1898  * @return - 0 if the instruction's memory could be read; nonzero if
1899  * not.
1900  */
1902  struct InternalInstruction *insn, byteReader_t reader,
1903  const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
1904  uint64_t startLoc, DisassemblerMode mode) {
1905  memset(insn, 0, sizeof(struct InternalInstruction));
1906 
1907  insn->reader = reader;
1908  insn->readerArg = readerArg;
1909  insn->dlog = logger;
1910  insn->dlogArg = loggerArg;
1911  insn->startLocation = startLoc;
1912  insn->readerCursor = startLoc;
1913  insn->mode = mode;
1914  insn->numImmediatesConsumed = 0;
1915 
1916  if (readPrefixes(insn) ||
1917  readOpcode(insn) ||
1918  getID(insn, miiArg) ||
1919  insn->instructionID == 0 ||
1920  readOperands(insn))
1921  return -1;
1922 
1923  insn->operands = x86OperandSets[insn->spec->operands];
1924 
1925  insn->length = insn->readerCursor - insn->startLocation;
1926 
1927  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1928  startLoc, insn->readerCursor, insn->length);
1929 
1930  if (insn->length > 15)
1931  dbgprintf(insn, "Instruction exceeds 15-byte limit");
1932 
1933  return 0;
1934 }
#define bFromEVEX4of4(evex)
static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix)
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
#define rFromREX(rex)
#define XOP9_MAP_SYM
#define wFromEVEX3of4(evex)
The specification for how to extract and interpret a full instruction and its operands.
#define bFromVEX2of3(vex)
static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
#define rmFromModRM(modRM)
static int readSIB(struct InternalInstruction *insn)
#define zFromEVEX4of4(evex)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:123
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
#define vvvvFromVEX2of2(vex)
#define vvvvFromEVEX3of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
#define r2FromEVEX2of4(evex)
#define op(i)
#define aaaFromEVEX4of4(evex)
amode Optimize addressing mode
#define bFromEVEX2of4(evex)
SIBIndex
All possible values of the SIB index field.
static int readOpcode(struct InternalInstruction *insn)
#define rFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define rFromVEX2of2(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define lFromVEX2of2(vex)
static int readVVVV(struct InternalInstruction *insn)
static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
#define xFromXOP2of3(xop)
#define v2FromEVEX4of4(evex)
static int getID(struct InternalInstruction *insn, const void *miiArg)
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
#define THREEDNOW_MAP_SYM
ModRMDecision modRMDecisions[256]
static void unconsumeByte(struct InternalInstruction *insn)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
#define ppFromVEX3of3(vex)
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define vvvvFromVEX3of3(vex)
The specification for how to extract and interpret one operand.
static bool is64Bit(const char *name)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
#define CASE_ENCODING_RM
#define bFromXOP2of3(xop)
#define baseFromSIB(sib)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
#define wFromREX(rex)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
#define lFromVEX3of3(vex)
#define ppFromVEX2of2(vex)
static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode)
The x86 internal instruction, which is produced by the decoder.
static int readPrefixes(struct InternalInstruction *insn)
#define THREEBYTE3A_SYM
static void dbgprintf(struct InternalInstruction *insn, const char *format,...)
StringRef GetInstrName(unsigned Opcode, const void *mii)
#define ONEBYTE_SYM
#define lFromXOP3of3(xop)
#define bFromREX(rex)
#define mmmmmFromVEX2of3(vex)
#define ppFromEVEX3of4(evex)
#define scaleFromSIB(sib)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
static int readOperands(struct InternalInstruction *insn)
#define xFromVEX2of3(vex)
static bool is16BitEquivalent(const char *orig, const char *equiv)
#define rFromXOP2of3(xop)
#define CONSUME_FUNC(name, type)
#define INSTRUCTIONS_SYM
#define THREEBYTE38_SYM
#define TWOBYTE_SYM
#define CASE_ENCODING_VSIB
static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer)
Returns true if Prefix is a prefix of longer.
#define lFromEVEX4of4(evex)
static InstructionContext contextForAttrs(uint16_t attrMask)
#define l2FromEVEX4of4(evex)
#define wFromVEX3of3(vex)
#define wFromXOP3of3(xop)
#define debug(s)
#define xFromREX(rex)
#define indexFromSIB(sib)
SIBBase
All possible values of the SIB base field.
static int readModRM(struct InternalInstruction *insn)
#define mmFromEVEX2of4(evex)
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define XOP8_MAP_SYM
#define rFromVEX2of3(vex)
Specifies which opcode->instruction tables to look at given a particular context (set of attributes)...
#define regFromModRM(modRM)
static const struct InstructionSpecifier * specifierForUID(InstrUID uid)
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
#define modFromModRM(modRM)
OpcodeDecision opcodeDecisions[IC_max]
static int readMaskRegister(struct InternalInstruction *insn)
aarch64 promote const
static const char * name
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
#define XOPA_MAP_SYM
#define CONTEXTS_SYM
#define vvvvFromXOP3of3(vex)
static int getIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
#define ppFromXOP3of3(xop)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Specifies which set of ModR/M->instruction tables to look at given a particular opcode.
DisassemblerMode
Decoding mode for the Intel disassembler.
Specifies whether a ModR/M byte is needed and (if so) which instruction each possible value of the Mo...