LLVM  14.0.0git
X86Disassembler.cpp
Go to the documentation of this file.
1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75 
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
86 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
90 
91 using namespace llvm;
92 using namespace llvm::X86Disassembler;
93 
94 #define DEBUG_TYPE "x86-disassembler"
95 
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97 
98 // Specifies whether a ModR/M byte is needed and (if so) which
99 // instruction each possible value of the ModR/M byte corresponds to. Once
100 // this information is known, we have narrowed down to a single instruction.
102  uint8_t modrm_type;
104 };
105 
106 // Specifies which set of ModR/M->instruction tables to look at
107 // given a particular opcode.
109  ModRMDecision modRMDecisions[256];
110 };
111 
112 // Specifies which opcode->instruction tables to look at given
113 // a particular context (set of attributes). Since there are many possible
114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115 // applies given a specific set of attributes. Hence there are only IC_max
116 // entries in this table, rather than 2^(ATTR_max).
118  OpcodeDecision opcodeDecisions[IC_max];
119 };
120 
121 #include "X86GenDisassemblerTables.inc"
122 
124  uint8_t opcode, uint8_t modRM) {
125  const struct ModRMDecision *dec;
126 
127  switch (type) {
128  case ONEBYTE:
129  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130  break;
131  case TWOBYTE:
132  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133  break;
134  case THREEBYTE_38:
135  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136  break;
137  case THREEBYTE_3A:
138  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139  break;
140  case XOP8_MAP:
141  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142  break;
143  case XOP9_MAP:
144  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145  break;
146  case XOPA_MAP:
147  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148  break;
149  case THREEDNOW_MAP:
150  dec =
151  &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152  break;
153  case MAP5:
154  dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155  break;
156  case MAP6:
157  dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158  break;
159  }
160 
161  switch (dec->modrm_type) {
162  default:
163  llvm_unreachable("Corrupt table! Unknown modrm_type");
164  return 0;
165  case MODRM_ONEENTRY:
166  return modRMTable[dec->instructionIDs];
167  case MODRM_SPLITRM:
168  if (modFromModRM(modRM) == 0x3)
169  return modRMTable[dec->instructionIDs + 1];
170  return modRMTable[dec->instructionIDs];
171  case MODRM_SPLITREG:
172  if (modFromModRM(modRM) == 0x3)
173  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
174  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
175  case MODRM_SPLITMISC:
176  if (modFromModRM(modRM) == 0x3)
177  return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
178  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
179  case MODRM_FULL:
180  return modRMTable[dec->instructionIDs + modRM];
181  }
182 }
183 
184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
185  uint64_t offset = insn->readerCursor - insn->startLocation;
186  if (offset >= insn->bytes.size())
187  return true;
188  byte = insn->bytes[offset];
189  return false;
190 }
191 
192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
193  auto r = insn->bytes;
194  uint64_t offset = insn->readerCursor - insn->startLocation;
195  if (offset + sizeof(T) > r.size())
196  return true;
197  T ret = 0;
198  for (unsigned i = 0; i < sizeof(T); ++i)
199  ret |= (uint64_t)r[offset + i] << (i * 8);
200  ptr = ret;
201  insn->readerCursor += sizeof(T);
202  return false;
203 }
204 
205 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206  return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
207 }
208 
209 // Consumes all of an instruction's prefix bytes, and marks the
210 // instruction as having them. Also sets the instruction's default operand,
211 // address, and other relevant data sizes to report operands correctly.
212 //
213 // insn must not be empty.
214 static int readPrefixes(struct InternalInstruction *insn) {
215  bool isPrefix = true;
216  uint8_t byte = 0;
217  uint8_t nextByte;
218 
219  LLVM_DEBUG(dbgs() << "readPrefixes()");
220 
221  while (isPrefix) {
222  // If we fail reading prefixes, just stop here and let the opcode reader
223  // deal with it.
224  if (consume(insn, byte))
225  break;
226 
227  // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
228  // break and let it be disassembled as a normal "instruction".
229  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
230  break;
231 
232  if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
233  // If the byte is 0xf2 or 0xf3, and any of the following conditions are
234  // met:
235  // - it is followed by a LOCK (0xf0) prefix
236  // - it is followed by an xchg instruction
237  // then it should be disassembled as a xacquire/xrelease not repne/rep.
238  if (((nextByte == 0xf0) ||
239  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
240  insn->xAcquireRelease = true;
241  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
242  break;
243  }
244  // Also if the byte is 0xf3, and the following condition is met:
245  // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
246  // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
247  // then it should be disassembled as an xrelease not rep.
248  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
249  nextByte == 0xc6 || nextByte == 0xc7)) {
250  insn->xAcquireRelease = true;
251  break;
252  }
253  if (isREX(insn, nextByte)) {
254  uint8_t nnextByte;
255  // Go to REX prefix after the current one
256  if (consume(insn, nnextByte))
257  return -1;
258  // We should be able to read next byte after REX prefix
259  if (peek(insn, nnextByte))
260  return -1;
261  --insn->readerCursor;
262  }
263  }
264 
265  switch (byte) {
266  case 0xf0: // LOCK
267  insn->hasLockPrefix = true;
268  break;
269  case 0xf2: // REPNE/REPNZ
270  case 0xf3: { // REP or REPE/REPZ
271  uint8_t nextByte;
272  if (peek(insn, nextByte))
273  break;
274  // TODO:
275  // 1. There could be several 0x66
276  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
277  // it's not mandatory prefix
278  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
279  // 0x0f exactly after it to be mandatory prefix
280  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
281  // The last of 0xf2 /0xf3 is mandatory prefix
282  insn->mandatoryPrefix = byte;
283  insn->repeatPrefix = byte;
284  break;
285  }
286  case 0x2e: // CS segment override -OR- Branch not taken
288  break;
289  case 0x36: // SS segment override -OR- Branch taken
291  break;
292  case 0x3e: // DS segment override
294  break;
295  case 0x26: // ES segment override
297  break;
298  case 0x64: // FS segment override
300  break;
301  case 0x65: // GS segment override
303  break;
304  case 0x66: { // Operand-size override {
305  uint8_t nextByte;
306  insn->hasOpSize = true;
307  if (peek(insn, nextByte))
308  break;
309  // 0x66 can't overwrite existing mandatory prefix and should be ignored
310  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
311  insn->mandatoryPrefix = byte;
312  break;
313  }
314  case 0x67: // Address-size override
315  insn->hasAdSize = true;
316  break;
317  default: // Not a prefix byte
318  isPrefix = false;
319  break;
320  }
321 
322  if (isPrefix)
323  LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
324  }
325 
327 
328  if (byte == 0x62) {
329  uint8_t byte1, byte2;
330  if (consume(insn, byte1)) {
331  LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
332  return -1;
333  }
334 
335  if (peek(insn, byte2)) {
336  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
337  return -1;
338  }
339 
340  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
341  ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
343  } else {
344  --insn->readerCursor; // unconsume byte1
345  --insn->readerCursor; // unconsume byte
346  }
347 
348  if (insn->vectorExtensionType == TYPE_EVEX) {
349  insn->vectorExtensionPrefix[0] = byte;
350  insn->vectorExtensionPrefix[1] = byte1;
351  if (consume(insn, insn->vectorExtensionPrefix[2])) {
352  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
353  return -1;
354  }
355  if (consume(insn, insn->vectorExtensionPrefix[3])) {
356  LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
357  return -1;
358  }
359 
360  // We simulate the REX prefix for simplicity's sake
361  if (insn->mode == MODE_64BIT) {
362  insn->rexPrefix = 0x40 |
363  (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
364  (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
365  (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
366  (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
367  }
368 
369  LLVM_DEBUG(
370  dbgs() << format(
371  "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
372  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
373  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
374  }
375  } else if (byte == 0xc4) {
376  uint8_t byte1;
377  if (peek(insn, byte1)) {
378  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
379  return -1;
380  }
381 
382  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
384  else
385  --insn->readerCursor;
386 
387  if (insn->vectorExtensionType == TYPE_VEX_3B) {
388  insn->vectorExtensionPrefix[0] = byte;
389  consume(insn, insn->vectorExtensionPrefix[1]);
390  consume(insn, insn->vectorExtensionPrefix[2]);
391 
392  // We simulate the REX prefix for simplicity's sake
393 
394  if (insn->mode == MODE_64BIT)
395  insn->rexPrefix = 0x40 |
396  (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
397  (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
398  (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
399  (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
400 
401  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
402  insn->vectorExtensionPrefix[0],
403  insn->vectorExtensionPrefix[1],
404  insn->vectorExtensionPrefix[2]));
405  }
406  } else if (byte == 0xc5) {
407  uint8_t byte1;
408  if (peek(insn, byte1)) {
409  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
410  return -1;
411  }
412 
413  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
415  else
416  --insn->readerCursor;
417 
418  if (insn->vectorExtensionType == TYPE_VEX_2B) {
419  insn->vectorExtensionPrefix[0] = byte;
420  consume(insn, insn->vectorExtensionPrefix[1]);
421 
422  if (insn->mode == MODE_64BIT)
423  insn->rexPrefix =
424  0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
425 
426  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
427  default:
428  break;
429  case VEX_PREFIX_66:
430  insn->hasOpSize = true;
431  break;
432  }
433 
434  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
435  insn->vectorExtensionPrefix[0],
436  insn->vectorExtensionPrefix[1]));
437  }
438  } else if (byte == 0x8f) {
439  uint8_t byte1;
440  if (peek(insn, byte1)) {
441  LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
442  return -1;
443  }
444 
445  if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
447  else
448  --insn->readerCursor;
449 
450  if (insn->vectorExtensionType == TYPE_XOP) {
451  insn->vectorExtensionPrefix[0] = byte;
452  consume(insn, insn->vectorExtensionPrefix[1]);
453  consume(insn, insn->vectorExtensionPrefix[2]);
454 
455  // We simulate the REX prefix for simplicity's sake
456 
457  if (insn->mode == MODE_64BIT)
458  insn->rexPrefix = 0x40 |
459  (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
460  (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
461  (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
462  (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
463 
464  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
465  default:
466  break;
467  case VEX_PREFIX_66:
468  insn->hasOpSize = true;
469  break;
470  }
471 
472  LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
473  insn->vectorExtensionPrefix[0],
474  insn->vectorExtensionPrefix[1],
475  insn->vectorExtensionPrefix[2]));
476  }
477  } else if (isREX(insn, byte)) {
478  if (peek(insn, nextByte))
479  return -1;
480  insn->rexPrefix = byte;
481  LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
482  } else
483  --insn->readerCursor;
484 
485  if (insn->mode == MODE_16BIT) {
486  insn->registerSize = (insn->hasOpSize ? 4 : 2);
487  insn->addressSize = (insn->hasAdSize ? 4 : 2);
488  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
489  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
490  } else if (insn->mode == MODE_32BIT) {
491  insn->registerSize = (insn->hasOpSize ? 2 : 4);
492  insn->addressSize = (insn->hasAdSize ? 2 : 4);
493  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
494  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
495  } else if (insn->mode == MODE_64BIT) {
496  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
497  insn->registerSize = 8;
498  insn->addressSize = (insn->hasAdSize ? 4 : 8);
499  insn->displacementSize = 4;
500  insn->immediateSize = 4;
501  insn->hasOpSize = false;
502  } else {
503  insn->registerSize = (insn->hasOpSize ? 2 : 4);
504  insn->addressSize = (insn->hasAdSize ? 4 : 8);
505  insn->displacementSize = (insn->hasOpSize ? 2 : 4);
506  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
507  }
508  }
509 
510  return 0;
511 }
512 
513 // Consumes the SIB byte to determine addressing information.
514 static int readSIB(struct InternalInstruction *insn) {
515  SIBBase sibBaseBase = SIB_BASE_NONE;
516  uint8_t index, base;
517 
518  LLVM_DEBUG(dbgs() << "readSIB()");
519  switch (insn->addressSize) {
520  case 2:
521  default:
522  llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
523  case 4:
524  insn->sibIndexBase = SIB_INDEX_EAX;
525  sibBaseBase = SIB_BASE_EAX;
526  break;
527  case 8:
528  insn->sibIndexBase = SIB_INDEX_RAX;
529  sibBaseBase = SIB_BASE_RAX;
530  break;
531  }
532 
533  if (consume(insn, insn->sib))
534  return -1;
535 
536  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
537 
538  if (index == 0x4) {
539  insn->sibIndex = SIB_INDEX_NONE;
540  } else {
541  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
542  }
543 
544  insn->sibScale = 1 << scaleFromSIB(insn->sib);
545 
546  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
547 
548  switch (base) {
549  case 0x5:
550  case 0xd:
551  switch (modFromModRM(insn->modRM)) {
552  case 0x0:
553  insn->eaDisplacement = EA_DISP_32;
554  insn->sibBase = SIB_BASE_NONE;
555  break;
556  case 0x1:
557  insn->eaDisplacement = EA_DISP_8;
558  insn->sibBase = (SIBBase)(sibBaseBase + base);
559  break;
560  case 0x2:
561  insn->eaDisplacement = EA_DISP_32;
562  insn->sibBase = (SIBBase)(sibBaseBase + base);
563  break;
564  default:
565  llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
566  }
567  break;
568  default:
569  insn->sibBase = (SIBBase)(sibBaseBase + base);
570  break;
571  }
572 
573  return 0;
574 }
575 
576 static int readDisplacement(struct InternalInstruction *insn) {
577  int8_t d8;
578  int16_t d16;
579  int32_t d32;
580  LLVM_DEBUG(dbgs() << "readDisplacement()");
581 
582  insn->displacementOffset = insn->readerCursor - insn->startLocation;
583  switch (insn->eaDisplacement) {
584  case EA_DISP_NONE:
585  break;
586  case EA_DISP_8:
587  if (consume(insn, d8))
588  return -1;
589  insn->displacement = d8;
590  break;
591  case EA_DISP_16:
592  if (consume(insn, d16))
593  return -1;
594  insn->displacement = d16;
595  break;
596  case EA_DISP_32:
597  if (consume(insn, d32))
598  return -1;
599  insn->displacement = d32;
600  break;
601  }
602 
603  return 0;
604 }
605 
606 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
607 static int readModRM(struct InternalInstruction *insn) {
608  uint8_t mod, rm, reg, evexrm;
609  LLVM_DEBUG(dbgs() << "readModRM()");
610 
611  if (insn->consumedModRM)
612  return 0;
613 
614  if (consume(insn, insn->modRM))
615  return -1;
616  insn->consumedModRM = true;
617 
618  mod = modFromModRM(insn->modRM);
619  rm = rmFromModRM(insn->modRM);
620  reg = regFromModRM(insn->modRM);
621 
622  // This goes by insn->registerSize to pick the correct register, which messes
623  // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
624  // fixupReg().
625  switch (insn->registerSize) {
626  case 2:
627  insn->regBase = MODRM_REG_AX;
628  insn->eaRegBase = EA_REG_AX;
629  break;
630  case 4:
631  insn->regBase = MODRM_REG_EAX;
632  insn->eaRegBase = EA_REG_EAX;
633  break;
634  case 8:
635  insn->regBase = MODRM_REG_RAX;
636  insn->eaRegBase = EA_REG_RAX;
637  break;
638  }
639 
640  reg |= rFromREX(insn->rexPrefix) << 3;
641  rm |= bFromREX(insn->rexPrefix) << 3;
642 
643  evexrm = 0;
644  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
645  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
646  evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
647  }
648 
649  insn->reg = (Reg)(insn->regBase + reg);
650 
651  switch (insn->addressSize) {
652  case 2: {
653  EABase eaBaseBase = EA_BASE_BX_SI;
654 
655  switch (mod) {
656  case 0x0:
657  if (rm == 0x6) {
658  insn->eaBase = EA_BASE_NONE;
659  insn->eaDisplacement = EA_DISP_16;
660  if (readDisplacement(insn))
661  return -1;
662  } else {
663  insn->eaBase = (EABase)(eaBaseBase + rm);
665  }
666  break;
667  case 0x1:
668  insn->eaBase = (EABase)(eaBaseBase + rm);
669  insn->eaDisplacement = EA_DISP_8;
670  insn->displacementSize = 1;
671  if (readDisplacement(insn))
672  return -1;
673  break;
674  case 0x2:
675  insn->eaBase = (EABase)(eaBaseBase + rm);
676  insn->eaDisplacement = EA_DISP_16;
677  if (readDisplacement(insn))
678  return -1;
679  break;
680  case 0x3:
681  insn->eaBase = (EABase)(insn->eaRegBase + rm);
682  if (readDisplacement(insn))
683  return -1;
684  break;
685  }
686  break;
687  }
688  case 4:
689  case 8: {
690  EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
691 
692  switch (mod) {
693  case 0x0:
694  insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
695  // In determining whether RIP-relative mode is used (rm=5),
696  // or whether a SIB byte is present (rm=4),
697  // the extension bits (REX.b and EVEX.x) are ignored.
698  switch (rm & 7) {
699  case 0x4: // SIB byte is present
700  insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
701  if (readSIB(insn) || readDisplacement(insn))
702  return -1;
703  break;
704  case 0x5: // RIP-relative
705  insn->eaBase = EA_BASE_NONE;
706  insn->eaDisplacement = EA_DISP_32;
707  if (readDisplacement(insn))
708  return -1;
709  break;
710  default:
711  insn->eaBase = (EABase)(eaBaseBase + rm);
712  break;
713  }
714  break;
715  case 0x1:
716  insn->displacementSize = 1;
718  case 0x2:
719  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
720  switch (rm & 7) {
721  case 0x4: // SIB byte is present
722  insn->eaBase = EA_BASE_sib;
723  if (readSIB(insn) || readDisplacement(insn))
724  return -1;
725  break;
726  default:
727  insn->eaBase = (EABase)(eaBaseBase + rm);
728  if (readDisplacement(insn))
729  return -1;
730  break;
731  }
732  break;
733  case 0x3:
735  insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
736  break;
737  }
738  break;
739  }
740  } // switch (insn->addressSize)
741 
742  return 0;
743 }
744 
745 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
746  static uint16_t name(struct InternalInstruction *insn, OperandType type, \
747  uint8_t index, uint8_t *valid) { \
748  *valid = 1; \
749  switch (type) { \
750  default: \
751  debug("Unhandled register type"); \
752  *valid = 0; \
753  return 0; \
754  case TYPE_Rv: \
755  return base + index; \
756  case TYPE_R8: \
757  index &= mask; \
758  if (index > 0xf) \
759  *valid = 0; \
760  if (insn->rexPrefix && index >= 4 && index <= 7) { \
761  return prefix##_SPL + (index - 4); \
762  } else { \
763  return prefix##_AL + index; \
764  } \
765  case TYPE_R16: \
766  index &= mask; \
767  if (index > 0xf) \
768  *valid = 0; \
769  return prefix##_AX + index; \
770  case TYPE_R32: \
771  index &= mask; \
772  if (index > 0xf) \
773  *valid = 0; \
774  return prefix##_EAX + index; \
775  case TYPE_R64: \
776  index &= mask; \
777  if (index > 0xf) \
778  *valid = 0; \
779  return prefix##_RAX + index; \
780  case TYPE_ZMM: \
781  return prefix##_ZMM0 + index; \
782  case TYPE_YMM: \
783  return prefix##_YMM0 + index; \
784  case TYPE_XMM: \
785  return prefix##_XMM0 + index; \
786  case TYPE_TMM: \
787  if (index > 7) \
788  *valid = 0; \
789  return prefix##_TMM0 + index; \
790  case TYPE_VK: \
791  index &= 0xf; \
792  if (index > 7) \
793  *valid = 0; \
794  return prefix##_K0 + index; \
795  case TYPE_VK_PAIR: \
796  if (index > 7) \
797  *valid = 0; \
798  return prefix##_K0_K1 + (index / 2); \
799  case TYPE_MM64: \
800  return prefix##_MM0 + (index & 0x7); \
801  case TYPE_SEGMENTREG: \
802  if ((index & 7) > 5) \
803  *valid = 0; \
804  return prefix##_ES + (index & 7); \
805  case TYPE_DEBUGREG: \
806  return prefix##_DR0 + index; \
807  case TYPE_CONTROLREG: \
808  return prefix##_CR0 + index; \
809  case TYPE_BNDR: \
810  if (index > 3) \
811  *valid = 0; \
812  return prefix##_BND0 + index; \
813  case TYPE_MVSIBX: \
814  return prefix##_XMM0 + index; \
815  case TYPE_MVSIBY: \
816  return prefix##_YMM0 + index; \
817  case TYPE_MVSIBZ: \
818  return prefix##_ZMM0 + index; \
819  } \
820  }
821 
822 // Consult an operand type to determine the meaning of the reg or R/M field. If
823 // the operand is an XMM operand, for example, an operand would be XMM0 instead
824 // of AX, which readModRM() would otherwise misinterpret it as.
825 //
826 // @param insn - The instruction containing the operand.
827 // @param type - The operand type.
828 // @param index - The existing value of the field as reported by readModRM().
829 // @param valid - The address of a uint8_t. The target is set to 1 if the
830 // field is valid for the register class; 0 if not.
831 // @return - The proper value.
832 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
833 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
834 
835 // Consult an operand specifier to determine which of the fixup*Value functions
836 // to use in correcting readModRM()'ss interpretation.
837 //
838 // @param insn - See fixup*Value().
839 // @param op - The operand specifier.
840 // @return - 0 if fixup was successful; -1 if the register returned was
841 // invalid for its class.
842 static int fixupReg(struct InternalInstruction *insn,
843  const struct OperandSpecifier *op) {
844  uint8_t valid;
845  LLVM_DEBUG(dbgs() << "fixupReg()");
846 
847  switch ((OperandEncoding)op->encoding) {
848  default:
849  debug("Expected a REG or R/M encoding in fixupReg");
850  return -1;
851  case ENCODING_VVVV:
852  insn->vvvv =
853  (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
854  if (!valid)
855  return -1;
856  break;
857  case ENCODING_REG:
858  insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
859  insn->reg - insn->regBase, &valid);
860  if (!valid)
861  return -1;
862  break;
863  case ENCODING_SIB:
865  if (insn->eaBase >= insn->eaRegBase) {
866  insn->eaBase = (EABase)fixupRMValue(
867  insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
868  if (!valid)
869  return -1;
870  }
871  break;
872  }
873 
874  return 0;
875 }
876 
877 // Read the opcode (except the ModR/M byte in the case of extended or escape
878 // opcodes).
879 static bool readOpcode(struct InternalInstruction *insn) {
880  uint8_t current;
881  LLVM_DEBUG(dbgs() << "readOpcode()");
882 
883  insn->opcodeType = ONEBYTE;
884  if (insn->vectorExtensionType == TYPE_EVEX) {
885  switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
886  default:
887  LLVM_DEBUG(
888  dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
890  return true;
891  case VEX_LOB_0F:
892  insn->opcodeType = TWOBYTE;
893  return consume(insn, insn->opcode);
894  case VEX_LOB_0F38:
895  insn->opcodeType = THREEBYTE_38;
896  return consume(insn, insn->opcode);
897  case VEX_LOB_0F3A:
898  insn->opcodeType = THREEBYTE_3A;
899  return consume(insn, insn->opcode);
900  case VEX_LOB_MAP5:
901  insn->opcodeType = MAP5;
902  return consume(insn, insn->opcode);
903  case VEX_LOB_MAP6:
904  insn->opcodeType = MAP6;
905  return consume(insn, insn->opcode);
906  }
907  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
908  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
909  default:
910  LLVM_DEBUG(
911  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
913  return true;
914  case VEX_LOB_0F:
915  insn->opcodeType = TWOBYTE;
916  return consume(insn, insn->opcode);
917  case VEX_LOB_0F38:
918  insn->opcodeType = THREEBYTE_38;
919  return consume(insn, insn->opcode);
920  case VEX_LOB_0F3A:
921  insn->opcodeType = THREEBYTE_3A;
922  return consume(insn, insn->opcode);
923  case VEX_LOB_MAP5:
924  insn->opcodeType = MAP5;
925  return consume(insn, insn->opcode);
926  case VEX_LOB_MAP6:
927  insn->opcodeType = MAP6;
928  return consume(insn, insn->opcode);
929  }
930  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
931  insn->opcodeType = TWOBYTE;
932  return consume(insn, insn->opcode);
933  } else if (insn->vectorExtensionType == TYPE_XOP) {
934  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
935  default:
936  LLVM_DEBUG(
937  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
939  return true;
940  case XOP_MAP_SELECT_8:
941  insn->opcodeType = XOP8_MAP;
942  return consume(insn, insn->opcode);
943  case XOP_MAP_SELECT_9:
944  insn->opcodeType = XOP9_MAP;
945  return consume(insn, insn->opcode);
946  case XOP_MAP_SELECT_A:
947  insn->opcodeType = XOPA_MAP;
948  return consume(insn, insn->opcode);
949  }
950  }
951 
952  if (consume(insn, current))
953  return true;
954 
955  if (current == 0x0f) {
956  LLVM_DEBUG(
957  dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
958  if (consume(insn, current))
959  return true;
960 
961  if (current == 0x38) {
962  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
963  current));
964  if (consume(insn, current))
965  return true;
966 
967  insn->opcodeType = THREEBYTE_38;
968  } else if (current == 0x3a) {
969  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
970  current));
971  if (consume(insn, current))
972  return true;
973 
974  insn->opcodeType = THREEBYTE_3A;
975  } else if (current == 0x0f) {
976  LLVM_DEBUG(
977  dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
978 
979  // Consume operands before the opcode to comply with the 3DNow encoding
980  if (readModRM(insn))
981  return true;
982 
983  if (consume(insn, current))
984  return true;
985 
986  insn->opcodeType = THREEDNOW_MAP;
987  } else {
988  LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
989  insn->opcodeType = TWOBYTE;
990  }
991  } else if (insn->mandatoryPrefix)
992  // The opcode with mandatory prefix must start with opcode escape.
993  // If not it's legacy repeat prefix
994  insn->mandatoryPrefix = 0;
995 
996  // At this point we have consumed the full opcode.
997  // Anything we consume from here on must be unconsumed.
998  insn->opcode = current;
999 
1000  return false;
1001 }
1002 
1003 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1004 static bool is16BitEquivalent(const char *orig, const char *equiv) {
1005  for (int i = 0;; i++) {
1006  if (orig[i] == '\0' && equiv[i] == '\0')
1007  return true;
1008  if (orig[i] == '\0' || equiv[i] == '\0')
1009  return false;
1010  if (orig[i] != equiv[i]) {
1011  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1012  continue;
1013  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1014  continue;
1015  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1016  continue;
1017  return false;
1018  }
1019  }
1020 }
1021 
1022 // Determine whether this instruction is a 64-bit instruction.
1023 static bool is64Bit(const char *name) {
1024  for (int i = 0;; ++i) {
1025  if (name[i] == '\0')
1026  return false;
1027  if (name[i] == '6' && name[i + 1] == '4')
1028  return true;
1029  }
1030 }
1031 
1032 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1033 // for extended and escape opcodes, and using a supplied attribute mask.
1034 static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1035  struct InternalInstruction *insn,
1036  uint16_t attrMask) {
1037  auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1038  const ContextDecision *decision;
1039  switch (insn->opcodeType) {
1040  case ONEBYTE:
1041  decision = &ONEBYTE_SYM;
1042  break;
1043  case TWOBYTE:
1044  decision = &TWOBYTE_SYM;
1045  break;
1046  case THREEBYTE_38:
1047  decision = &THREEBYTE38_SYM;
1048  break;
1049  case THREEBYTE_3A:
1050  decision = &THREEBYTE3A_SYM;
1051  break;
1052  case XOP8_MAP:
1053  decision = &XOP8_MAP_SYM;
1054  break;
1055  case XOP9_MAP:
1056  decision = &XOP9_MAP_SYM;
1057  break;
1058  case XOPA_MAP:
1059  decision = &XOPA_MAP_SYM;
1060  break;
1061  case THREEDNOW_MAP:
1062  decision = &THREEDNOW_MAP_SYM;
1063  break;
1064  case MAP5:
1065  decision = &MAP5_SYM;
1066  break;
1067  case MAP6:
1068  decision = &MAP6_SYM;
1069  break;
1070  }
1071 
1072  if (decision->opcodeDecisions[insnCtx]
1073  .modRMDecisions[insn->opcode]
1074  .modrm_type != MODRM_ONEENTRY) {
1075  if (readModRM(insn))
1076  return -1;
1077  *instructionID =
1078  decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1079  } else {
1080  *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1081  }
1082 
1083  return 0;
1084 }
1085 
1086 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1087 // for extended and escape opcodes. Determines the attributes and context for
1088 // the instruction before doing so.
1089 static int getInstructionID(struct InternalInstruction *insn,
1090  const MCInstrInfo *mii) {
1091  uint16_t attrMask;
1092  uint16_t instructionID;
1093 
1094  LLVM_DEBUG(dbgs() << "getID()");
1095 
1096  attrMask = ATTR_NONE;
1097 
1098  if (insn->mode == MODE_64BIT)
1099  attrMask |= ATTR_64BIT;
1100 
1101  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1102  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1103 
1104  if (insn->vectorExtensionType == TYPE_EVEX) {
1105  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1106  case VEX_PREFIX_66:
1107  attrMask |= ATTR_OPSIZE;
1108  break;
1109  case VEX_PREFIX_F3:
1110  attrMask |= ATTR_XS;
1111  break;
1112  case VEX_PREFIX_F2:
1113  attrMask |= ATTR_XD;
1114  break;
1115  }
1116 
1117  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1118  attrMask |= ATTR_EVEXKZ;
1119  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1120  attrMask |= ATTR_EVEXB;
1121  if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1122  attrMask |= ATTR_EVEXK;
1123  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1124  attrMask |= ATTR_VEXL;
1125  if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1126  attrMask |= ATTR_EVEXL2;
1127  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1128  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1129  case VEX_PREFIX_66:
1130  attrMask |= ATTR_OPSIZE;
1131  break;
1132  case VEX_PREFIX_F3:
1133  attrMask |= ATTR_XS;
1134  break;
1135  case VEX_PREFIX_F2:
1136  attrMask |= ATTR_XD;
1137  break;
1138  }
1139 
1140  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1141  attrMask |= ATTR_VEXL;
1142  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1143  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1144  case VEX_PREFIX_66:
1145  attrMask |= ATTR_OPSIZE;
1146  if (insn->hasAdSize)
1147  attrMask |= ATTR_ADSIZE;
1148  break;
1149  case VEX_PREFIX_F3:
1150  attrMask |= ATTR_XS;
1151  break;
1152  case VEX_PREFIX_F2:
1153  attrMask |= ATTR_XD;
1154  break;
1155  }
1156 
1157  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1158  attrMask |= ATTR_VEXL;
1159  } else if (insn->vectorExtensionType == TYPE_XOP) {
1160  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1161  case VEX_PREFIX_66:
1162  attrMask |= ATTR_OPSIZE;
1163  break;
1164  case VEX_PREFIX_F3:
1165  attrMask |= ATTR_XS;
1166  break;
1167  case VEX_PREFIX_F2:
1168  attrMask |= ATTR_XD;
1169  break;
1170  }
1171 
1172  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1173  attrMask |= ATTR_VEXL;
1174  } else {
1175  return -1;
1176  }
1177  } else if (!insn->mandatoryPrefix) {
1178  // If we don't have mandatory prefix we should use legacy prefixes here
1179  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1180  attrMask |= ATTR_OPSIZE;
1181  if (insn->hasAdSize)
1182  attrMask |= ATTR_ADSIZE;
1183  if (insn->opcodeType == ONEBYTE) {
1184  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1185  // Special support for PAUSE
1186  attrMask |= ATTR_XS;
1187  } else {
1188  if (insn->repeatPrefix == 0xf2)
1189  attrMask |= ATTR_XD;
1190  else if (insn->repeatPrefix == 0xf3)
1191  attrMask |= ATTR_XS;
1192  }
1193  } else {
1194  switch (insn->mandatoryPrefix) {
1195  case 0xf2:
1196  attrMask |= ATTR_XD;
1197  break;
1198  case 0xf3:
1199  attrMask |= ATTR_XS;
1200  break;
1201  case 0x66:
1202  if (insn->mode != MODE_16BIT)
1203  attrMask |= ATTR_OPSIZE;
1204  if (insn->hasAdSize)
1205  attrMask |= ATTR_ADSIZE;
1206  break;
1207  case 0x67:
1208  attrMask |= ATTR_ADSIZE;
1209  break;
1210  }
1211  }
1212 
1213  if (insn->rexPrefix & 0x08) {
1214  attrMask |= ATTR_REXW;
1215  attrMask &= ~ATTR_ADSIZE;
1216  }
1217 
1218  if (insn->mode == MODE_16BIT) {
1219  // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1220  // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1221  if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1222  attrMask ^= ATTR_ADSIZE;
1223  // If we're in 16-bit mode and this is one of the relative jumps and opsize
1224  // prefix isn't present, we need to force the opsize attribute since the
1225  // prefix is inverted relative to 32-bit mode.
1226  if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1227  (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1228  attrMask |= ATTR_OPSIZE;
1229 
1230  if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1231  insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1232  attrMask |= ATTR_OPSIZE;
1233  }
1234 
1235 
1236  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1237  return -1;
1238 
1239  // The following clauses compensate for limitations of the tables.
1240 
1241  if (insn->mode != MODE_64BIT &&
1243  // The tables can't distinquish between cases where the W-bit is used to
1244  // select register size and cases where its a required part of the opcode.
1245  if ((insn->vectorExtensionType == TYPE_EVEX &&
1246  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1247  (insn->vectorExtensionType == TYPE_VEX_3B &&
1248  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1249  (insn->vectorExtensionType == TYPE_XOP &&
1250  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1251 
1252  uint16_t instructionIDWithREXW;
1253  if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1254  attrMask | ATTR_REXW)) {
1255  insn->instructionID = instructionID;
1256  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1257  return 0;
1258  }
1259 
1260  auto SpecName = mii->getName(instructionIDWithREXW);
1261  // If not a 64-bit instruction. Switch the opcode.
1262  if (!is64Bit(SpecName.data())) {
1263  insn->instructionID = instructionIDWithREXW;
1264  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1265  return 0;
1266  }
1267  }
1268  }
1269 
1270  // Absolute moves, umonitor, and movdir64b need special handling.
1271  // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1272  // inverted w.r.t.
1273  // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1274  // any position.
1275  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1276  (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1277  (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1278  // Make sure we observed the prefixes in any position.
1279  if (insn->hasAdSize)
1280  attrMask |= ATTR_ADSIZE;
1281  if (insn->hasOpSize)
1282  attrMask |= ATTR_OPSIZE;
1283 
1284  // In 16-bit, invert the attributes.
1285  if (insn->mode == MODE_16BIT) {
1286  attrMask ^= ATTR_ADSIZE;
1287 
1288  // The OpSize attribute is only valid with the absolute moves.
1289  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1290  attrMask ^= ATTR_OPSIZE;
1291  }
1292 
1293  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1294  return -1;
1295 
1296  insn->instructionID = instructionID;
1297  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1298  return 0;
1299  }
1300 
1301  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1302  !(attrMask & ATTR_OPSIZE)) {
1303  // The instruction tables make no distinction between instructions that
1304  // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1305  // particular spot (i.e., many MMX operations). In general we're
1306  // conservative, but in the specific case where OpSize is present but not in
1307  // the right place we check if there's a 16-bit operation.
1308  const struct InstructionSpecifier *spec;
1309  uint16_t instructionIDWithOpsize;
1310  llvm::StringRef specName, specWithOpSizeName;
1311 
1312  spec = &INSTRUCTIONS_SYM[instructionID];
1313 
1314  if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1315  attrMask | ATTR_OPSIZE)) {
1316  // ModRM required with OpSize but not present. Give up and return the
1317  // version without OpSize set.
1318  insn->instructionID = instructionID;
1319  insn->spec = spec;
1320  return 0;
1321  }
1322 
1323  specName = mii->getName(instructionID);
1324  specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1325 
1326  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1327  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1328  insn->instructionID = instructionIDWithOpsize;
1329  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1330  } else {
1331  insn->instructionID = instructionID;
1332  insn->spec = spec;
1333  }
1334  return 0;
1335  }
1336 
1337  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1338  insn->rexPrefix & 0x01) {
1339  // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1340  // as XCHG %r8, %eax.
1341  const struct InstructionSpecifier *spec;
1342  uint16_t instructionIDWithNewOpcode;
1343  const struct InstructionSpecifier *specWithNewOpcode;
1344 
1345  spec = &INSTRUCTIONS_SYM[instructionID];
1346 
1347  // Borrow opcode from one of the other XCHGar opcodes
1348  insn->opcode = 0x91;
1349 
1350  if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1351  attrMask)) {
1352  insn->opcode = 0x90;
1353 
1354  insn->instructionID = instructionID;
1355  insn->spec = spec;
1356  return 0;
1357  }
1358 
1359  specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1360 
1361  // Change back
1362  insn->opcode = 0x90;
1363 
1364  insn->instructionID = instructionIDWithNewOpcode;
1365  insn->spec = specWithNewOpcode;
1366 
1367  return 0;
1368  }
1369 
1370  insn->instructionID = instructionID;
1371  insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1372 
1373  return 0;
1374 }
1375 
1376 // Read an operand from the opcode field of an instruction and interprets it
1377 // appropriately given the operand width. Handles AddRegFrm instructions.
1378 //
1379 // @param insn - the instruction whose opcode field is to be read.
1380 // @param size - The width (in bytes) of the register being specified.
1381 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1382 // RAX.
1383 // @return - 0 on success; nonzero otherwise.
1384 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1385  LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1386 
1387  if (size == 0)
1388  size = insn->registerSize;
1389 
1390  switch (size) {
1391  case 1:
1392  insn->opcodeRegister = (Reg)(
1393  MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1394  if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1395  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1396  insn->opcodeRegister =
1397  (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1398  }
1399 
1400  break;
1401  case 2:
1402  insn->opcodeRegister = (Reg)(
1403  MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1404  break;
1405  case 4:
1406  insn->opcodeRegister =
1407  (Reg)(MODRM_REG_EAX +
1408  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1409  break;
1410  case 8:
1411  insn->opcodeRegister =
1412  (Reg)(MODRM_REG_RAX +
1413  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1414  break;
1415  }
1416 
1417  return 0;
1418 }
1419 
1420 // Consume an immediate operand from an instruction, given the desired operand
1421 // size.
1422 //
1423 // @param insn - The instruction whose operand is to be read.
1424 // @param size - The width (in bytes) of the operand.
1425 // @return - 0 if the immediate was successfully consumed; nonzero
1426 // otherwise.
1427 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1428  uint8_t imm8;
1429  uint16_t imm16;
1430  uint32_t imm32;
1431  uint64_t imm64;
1432 
1433  LLVM_DEBUG(dbgs() << "readImmediate()");
1434 
1435  assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1436 
1437  insn->immediateSize = size;
1438  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1439 
1440  switch (size) {
1441  case 1:
1442  if (consume(insn, imm8))
1443  return -1;
1444  insn->immediates[insn->numImmediatesConsumed] = imm8;
1445  break;
1446  case 2:
1447  if (consume(insn, imm16))
1448  return -1;
1449  insn->immediates[insn->numImmediatesConsumed] = imm16;
1450  break;
1451  case 4:
1452  if (consume(insn, imm32))
1453  return -1;
1454  insn->immediates[insn->numImmediatesConsumed] = imm32;
1455  break;
1456  case 8:
1457  if (consume(insn, imm64))
1458  return -1;
1459  insn->immediates[insn->numImmediatesConsumed] = imm64;
1460  break;
1461  default:
1462  llvm_unreachable("invalid size");
1463  }
1464 
1465  insn->numImmediatesConsumed++;
1466 
1467  return 0;
1468 }
1469 
1470 // Consume vvvv from an instruction if it has a VEX prefix.
1471 static int readVVVV(struct InternalInstruction *insn) {
1472  LLVM_DEBUG(dbgs() << "readVVVV()");
1473 
1474  int vvvv;
1475  if (insn->vectorExtensionType == TYPE_EVEX)
1476  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1478  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1479  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1480  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1481  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1482  else if (insn->vectorExtensionType == TYPE_XOP)
1483  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1484  else
1485  return -1;
1486 
1487  if (insn->mode != MODE_64BIT)
1488  vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1489 
1490  insn->vvvv = static_cast<Reg>(vvvv);
1491  return 0;
1492 }
1493 
1494 // Read an mask register from the opcode field of an instruction.
1495 //
1496 // @param insn - The instruction whose opcode field is to be read.
1497 // @return - 0 on success; nonzero otherwise.
1498 static int readMaskRegister(struct InternalInstruction *insn) {
1499  LLVM_DEBUG(dbgs() << "readMaskRegister()");
1500 
1501  if (insn->vectorExtensionType != TYPE_EVEX)
1502  return -1;
1503 
1504  insn->writemask =
1505  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1506  return 0;
1507 }
1508 
1509 // Consults the specifier for an instruction and consumes all
1510 // operands for that instruction, interpreting them as it goes.
1511 static int readOperands(struct InternalInstruction *insn) {
1512  int hasVVVV, needVVVV;
1513  int sawRegImm = 0;
1514 
1515  LLVM_DEBUG(dbgs() << "readOperands()");
1516 
1517  // If non-zero vvvv specified, make sure one of the operands uses it.
1518  hasVVVV = !readVVVV(insn);
1519  needVVVV = hasVVVV && (insn->vvvv != 0);
1520 
1521  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1522  switch (Op.encoding) {
1523  case ENCODING_NONE:
1524  case ENCODING_SI:
1525  case ENCODING_DI:
1526  break;
1528  // VSIB can use the V2 bit so check only the other bits.
1529  if (needVVVV)
1530  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1531  if (readModRM(insn))
1532  return -1;
1533 
1534  // Reject if SIB wasn't used.
1535  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1536  return -1;
1537 
1538  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1539  if (insn->sibIndex == SIB_INDEX_NONE)
1540  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1541 
1542  // If EVEX.v2 is set this is one of the 16-31 registers.
1543  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1545  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1546 
1547  // Adjust the index register to the correct size.
1548  switch ((OperandType)Op.type) {
1549  default:
1550  debug("Unhandled VSIB index type");
1551  return -1;
1552  case TYPE_MVSIBX:
1553  insn->sibIndex =
1554  (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1555  break;
1556  case TYPE_MVSIBY:
1557  insn->sibIndex =
1558  (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1559  break;
1560  case TYPE_MVSIBZ:
1561  insn->sibIndex =
1562  (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1563  break;
1564  }
1565 
1566  // Apply the AVX512 compressed displacement scaling factor.
1567  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1568  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1569  break;
1570  case ENCODING_SIB:
1571  // Reject if SIB wasn't used.
1572  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1573  return -1;
1574  if (readModRM(insn))
1575  return -1;
1576  if (fixupReg(insn, &Op))
1577  return -1;
1578  break;
1579  case ENCODING_REG:
1581  if (readModRM(insn))
1582  return -1;
1583  if (fixupReg(insn, &Op))
1584  return -1;
1585  // Apply the AVX512 compressed displacement scaling factor.
1586  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1587  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1588  break;
1589  case ENCODING_IB:
1590  if (sawRegImm) {
1591  // Saw a register immediate so don't read again and instead split the
1592  // previous immediate. FIXME: This is a hack.
1593  insn->immediates[insn->numImmediatesConsumed] =
1594  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1595  ++insn->numImmediatesConsumed;
1596  break;
1597  }
1598  if (readImmediate(insn, 1))
1599  return -1;
1600  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1601  sawRegImm = 1;
1602  break;
1603  case ENCODING_IW:
1604  if (readImmediate(insn, 2))
1605  return -1;
1606  break;
1607  case ENCODING_ID:
1608  if (readImmediate(insn, 4))
1609  return -1;
1610  break;
1611  case ENCODING_IO:
1612  if (readImmediate(insn, 8))
1613  return -1;
1614  break;
1615  case ENCODING_Iv:
1616  if (readImmediate(insn, insn->immediateSize))
1617  return -1;
1618  break;
1619  case ENCODING_Ia:
1620  if (readImmediate(insn, insn->addressSize))
1621  return -1;
1622  break;
1623  case ENCODING_IRC:
1624  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1626  break;
1627  case ENCODING_RB:
1628  if (readOpcodeRegister(insn, 1))
1629  return -1;
1630  break;
1631  case ENCODING_RW:
1632  if (readOpcodeRegister(insn, 2))
1633  return -1;
1634  break;
1635  case ENCODING_RD:
1636  if (readOpcodeRegister(insn, 4))
1637  return -1;
1638  break;
1639  case ENCODING_RO:
1640  if (readOpcodeRegister(insn, 8))
1641  return -1;
1642  break;
1643  case ENCODING_Rv:
1644  if (readOpcodeRegister(insn, 0))
1645  return -1;
1646  break;
1647  case ENCODING_CC:
1648  insn->immediates[1] = insn->opcode & 0xf;
1649  break;
1650  case ENCODING_FP:
1651  break;
1652  case ENCODING_VVVV:
1653  needVVVV = 0; // Mark that we have found a VVVV operand.
1654  if (!hasVVVV)
1655  return -1;
1656  if (insn->mode != MODE_64BIT)
1657  insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1658  if (fixupReg(insn, &Op))
1659  return -1;
1660  break;
1661  case ENCODING_WRITEMASK:
1662  if (readMaskRegister(insn))
1663  return -1;
1664  break;
1665  case ENCODING_DUP:
1666  break;
1667  default:
1668  LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1669  return -1;
1670  }
1671  }
1672 
1673  // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1674  if (needVVVV)
1675  return -1;
1676 
1677  return 0;
1678 }
1679 
1680 namespace llvm {
1681 
1682 // Fill-ins to make the compiler happy. These constants are never actually
1683 // assigned; they are just filler to make an automatically-generated switch
1684 // statement work.
1685 namespace X86 {
1686  enum {
1687  BX_SI = 500,
1688  BX_DI = 501,
1689  BP_SI = 502,
1690  BP_DI = 503,
1691  sib = 504,
1692  sib64 = 505
1693  };
1694 } // namespace X86
1695 
1696 } // namespace llvm
1697 
1698 static bool translateInstruction(MCInst &target,
1699  InternalInstruction &source,
1700  const MCDisassembler *Dis);
1701 
1702 namespace {
1703 
1704 /// Generic disassembler for all X86 platforms. All each platform class should
1705 /// have to do is subclass the constructor, and provide a different
1706 /// disassemblerMode value.
1707 class X86GenericDisassembler : public MCDisassembler {
1708  std::unique_ptr<const MCInstrInfo> MII;
1709 public:
1710  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1711  std::unique_ptr<const MCInstrInfo> MII);
1712 public:
1713  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1714  ArrayRef<uint8_t> Bytes, uint64_t Address,
1715  raw_ostream &cStream) const override;
1716 
1717 private:
1718  DisassemblerMode fMode;
1719 };
1720 
1721 } // namespace
1722 
1723 X86GenericDisassembler::X86GenericDisassembler(
1724  const MCSubtargetInfo &STI,
1725  MCContext &Ctx,
1726  std::unique_ptr<const MCInstrInfo> MII)
1727  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1728  const FeatureBitset &FB = STI.getFeatureBits();
1729  if (FB[X86::Mode16Bit]) {
1730  fMode = MODE_16BIT;
1731  return;
1732  } else if (FB[X86::Mode32Bit]) {
1733  fMode = MODE_32BIT;
1734  return;
1735  } else if (FB[X86::Mode64Bit]) {
1736  fMode = MODE_64BIT;
1737  return;
1738  }
1739 
1740  llvm_unreachable("Invalid CPU mode");
1741 }
1742 
1743 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1744  MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1745  raw_ostream &CStream) const {
1746  CommentStream = &CStream;
1747 
1748  InternalInstruction Insn;
1749  memset(&Insn, 0, sizeof(InternalInstruction));
1750  Insn.bytes = Bytes;
1751  Insn.startLocation = Address;
1752  Insn.readerCursor = Address;
1753  Insn.mode = fMode;
1754 
1755  if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1756  getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1757  readOperands(&Insn)) {
1758  Size = Insn.readerCursor - Address;
1759  return Fail;
1760  }
1761 
1762  Insn.operands = x86OperandSets[Insn.spec->operands];
1763  Insn.length = Insn.readerCursor - Insn.startLocation;
1764  Size = Insn.length;
1765  if (Size > 15)
1766  LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1767 
1768  bool Ret = translateInstruction(Instr, Insn, this);
1769  if (!Ret) {
1770  unsigned Flags = X86::IP_NO_PREFIX;
1771  if (Insn.hasAdSize)
1772  Flags |= X86::IP_HAS_AD_SIZE;
1773  if (!Insn.mandatoryPrefix) {
1774  if (Insn.hasOpSize)
1775  Flags |= X86::IP_HAS_OP_SIZE;
1776  if (Insn.repeatPrefix == 0xf2)
1777  Flags |= X86::IP_HAS_REPEAT_NE;
1778  else if (Insn.repeatPrefix == 0xf3 &&
1779  // It should not be 'pause' f3 90
1780  Insn.opcode != 0x90)
1781  Flags |= X86::IP_HAS_REPEAT;
1782  if (Insn.hasLockPrefix)
1783  Flags |= X86::IP_HAS_LOCK;
1784  }
1785  Instr.setFlags(Flags);
1786  }
1787  return (!Ret) ? Success : Fail;
1788 }
1789 
1790 //
1791 // Private code that translates from struct InternalInstructions to MCInsts.
1792 //
1793 
1794 /// translateRegister - Translates an internal register to the appropriate LLVM
1795 /// register, and appends it as an operand to an MCInst.
1796 ///
1797 /// @param mcInst - The MCInst to append to.
1798 /// @param reg - The Reg to append.
1799 static void translateRegister(MCInst &mcInst, Reg reg) {
1800 #define ENTRY(x) X86::x,
1801  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1802 #undef ENTRY
1803 
1804  MCPhysReg llvmRegnum = llvmRegnums[reg];
1805  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1806 }
1807 
1808 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
1809 /// immediate Value in the MCInst.
1810 ///
1811 /// @param Value - The immediate Value, has had any PC adjustment made by
1812 /// the caller.
1813 /// @param isBranch - If the instruction is a branch instruction
1814 /// @param Address - The starting address of the instruction
1815 /// @param Offset - The byte offset to this immediate in the instruction
1816 /// @param Width - The byte width of this immediate in the instruction
1817 ///
1818 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
1819 /// called then that function is called to get any symbolic information for the
1820 /// immediate in the instruction using the Address, Offset and Width. If that
1821 /// returns non-zero then the symbolic information it returns is used to create
1822 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
1823 /// returns zero and isBranch is true then a symbol look up for immediate Value
1824 /// is done and if a symbol is found an MCExpr is created with that, else
1825 /// an MCExpr with the immediate Value is created. This function returns true
1826 /// if it adds an operand to the MCInst and false otherwise.
1827 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
1828  uint64_t Address, uint64_t Offset,
1829  uint64_t Width, MCInst &MI,
1830  const MCDisassembler *Dis) {
1832  Offset, Width);
1833 }
1834 
1835 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
1836 /// referenced by a load instruction with the base register that is the rip.
1837 /// These can often be addresses in a literal pool. The Address of the
1838 /// instruction and its immediate Value are used to determine the address
1839 /// being referenced in the literal pool entry. The SymbolLookUp call back will
1840 /// return a pointer to a literal 'C' string if the referenced address is an
1841 /// address into a section with 'C' string literals.
1843  const void *Decoder) {
1844  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
1846 }
1847 
1848 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1849  0, // SEG_OVERRIDE_NONE
1850  X86::CS,
1851  X86::SS,
1852  X86::DS,
1853  X86::ES,
1854  X86::FS,
1855  X86::GS
1856 };
1857 
1858 /// translateSrcIndex - Appends a source index operand to an MCInst.
1859 ///
1860 /// @param mcInst - The MCInst to append to.
1861 /// @param insn - The internal instruction.
1862 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1863  unsigned baseRegNo;
1864 
1865  if (insn.mode == MODE_64BIT)
1866  baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1867  else if (insn.mode == MODE_32BIT)
1868  baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1869  else {
1870  assert(insn.mode == MODE_16BIT);
1871  baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1872  }
1873  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1874  mcInst.addOperand(baseReg);
1875 
1876  MCOperand segmentReg;
1878  mcInst.addOperand(segmentReg);
1879  return false;
1880 }
1881 
1882 /// translateDstIndex - Appends a destination index operand to an MCInst.
1883 ///
1884 /// @param mcInst - The MCInst to append to.
1885 /// @param insn - The internal instruction.
1886 
1887 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1888  unsigned baseRegNo;
1889 
1890  if (insn.mode == MODE_64BIT)
1891  baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1892  else if (insn.mode == MODE_32BIT)
1893  baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1894  else {
1895  assert(insn.mode == MODE_16BIT);
1896  baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1897  }
1898  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1899  mcInst.addOperand(baseReg);
1900  return false;
1901 }
1902 
1903 /// translateImmediate - Appends an immediate operand to an MCInst.
1904 ///
1905 /// @param mcInst - The MCInst to append to.
1906 /// @param immediate - The immediate value to append.
1907 /// @param operand - The operand, as stored in the descriptor table.
1908 /// @param insn - The internal instruction.
1909 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1910  const OperandSpecifier &operand,
1911  InternalInstruction &insn,
1912  const MCDisassembler *Dis) {
1913  // Sign-extend the immediate if necessary.
1914 
1915  OperandType type = (OperandType)operand.type;
1916 
1917  bool isBranch = false;
1918  uint64_t pcrel = 0;
1919  if (type == TYPE_REL) {
1920  isBranch = true;
1921  pcrel = insn.startLocation +
1922  insn.immediateOffset + insn.immediateSize;
1923  switch (operand.encoding) {
1924  default:
1925  break;
1926  case ENCODING_Iv:
1927  switch (insn.displacementSize) {
1928  default:
1929  break;
1930  case 1:
1931  if(immediate & 0x80)
1932  immediate |= ~(0xffull);
1933  break;
1934  case 2:
1935  if(immediate & 0x8000)
1936  immediate |= ~(0xffffull);
1937  break;
1938  case 4:
1939  if(immediate & 0x80000000)
1940  immediate |= ~(0xffffffffull);
1941  break;
1942  case 8:
1943  break;
1944  }
1945  break;
1946  case ENCODING_IB:
1947  if(immediate & 0x80)
1948  immediate |= ~(0xffull);
1949  break;
1950  case ENCODING_IW:
1951  if(immediate & 0x8000)
1952  immediate |= ~(0xffffull);
1953  break;
1954  case ENCODING_ID:
1955  if(immediate & 0x80000000)
1956  immediate |= ~(0xffffffffull);
1957  break;
1958  }
1959  }
1960  // By default sign-extend all X86 immediates based on their encoding.
1961  else if (type == TYPE_IMM) {
1962  switch (operand.encoding) {
1963  default:
1964  break;
1965  case ENCODING_IB:
1966  if(immediate & 0x80)
1967  immediate |= ~(0xffull);
1968  break;
1969  case ENCODING_IW:
1970  if(immediate & 0x8000)
1971  immediate |= ~(0xffffull);
1972  break;
1973  case ENCODING_ID:
1974  if(immediate & 0x80000000)
1975  immediate |= ~(0xffffffffull);
1976  break;
1977  case ENCODING_IO:
1978  break;
1979  }
1980  }
1981 
1982  switch (type) {
1983  case TYPE_XMM:
1984  mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
1985  return;
1986  case TYPE_YMM:
1987  mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
1988  return;
1989  case TYPE_ZMM:
1990  mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
1991  return;
1992  default:
1993  // operand is 64 bits wide. Do nothing.
1994  break;
1995  }
1996 
1997  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
1998  insn.immediateOffset, insn.immediateSize,
1999  mcInst, Dis))
2000  mcInst.addOperand(MCOperand::createImm(immediate));
2001 
2002  if (type == TYPE_MOFFS) {
2003  MCOperand segmentReg;
2005  mcInst.addOperand(segmentReg);
2006  }
2007 }
2008 
2009 /// translateRMRegister - Translates a register stored in the R/M field of the
2010 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2011 /// @param mcInst - The MCInst to append to.
2012 /// @param insn - The internal instruction to extract the R/M field
2013 /// from.
2014 /// @return - 0 on success; -1 otherwise
2015 static bool translateRMRegister(MCInst &mcInst,
2016  InternalInstruction &insn) {
2017  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2018  debug("A R/M register operand may not have a SIB byte");
2019  return true;
2020  }
2021 
2022  switch (insn.eaBase) {
2023  default:
2024  debug("Unexpected EA base register");
2025  return true;
2026  case EA_BASE_NONE:
2027  debug("EA_BASE_NONE for ModR/M base");
2028  return true;
2029 #define ENTRY(x) case EA_BASE_##x:
2030  ALL_EA_BASES
2031 #undef ENTRY
2032  debug("A R/M register operand may not have a base; "
2033  "the operand must be a register.");
2034  return true;
2035 #define ENTRY(x) \
2036  case EA_REG_##x: \
2037  mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2038  ALL_REGS
2039 #undef ENTRY
2040  }
2041 
2042  return false;
2043 }
2044 
2045 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2046 /// fields of an internal instruction (and possibly its SIB byte) to a memory
2047 /// operand in LLVM's format, and appends it to an MCInst.
2048 ///
2049 /// @param mcInst - The MCInst to append to.
2050 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2051 /// from.
2052 /// @param ForceSIB - The instruction must use SIB.
2053 /// @return - 0 on success; nonzero otherwise
2054 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2055  const MCDisassembler *Dis,
2056  bool ForceSIB = false) {
2057  // Addresses in an MCInst are represented as five operands:
2058  // 1. basereg (register) The R/M base, or (if there is a SIB) the
2059  // SIB base
2060  // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2061  // scale amount
2062  // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2063  // the index (which is multiplied by the
2064  // scale amount)
2065  // 4. displacement (immediate) 0, or the displacement if there is one
2066  // 5. segmentreg (register) x86_registerNONE for now, but could be set
2067  // if we have segment overrides
2068 
2069  MCOperand baseReg;
2070  MCOperand scaleAmount;
2071  MCOperand indexReg;
2072  MCOperand displacement;
2073  MCOperand segmentReg;
2074  uint64_t pcrel = 0;
2075 
2076  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2077  if (insn.sibBase != SIB_BASE_NONE) {
2078  switch (insn.sibBase) {
2079  default:
2080  debug("Unexpected sibBase");
2081  return true;
2082 #define ENTRY(x) \
2083  case SIB_BASE_##x: \
2084  baseReg = MCOperand::createReg(X86::x); break;
2086 #undef ENTRY
2087  }
2088  } else {
2089  baseReg = MCOperand::createReg(X86::NoRegister);
2090  }
2091 
2092  if (insn.sibIndex != SIB_INDEX_NONE) {
2093  switch (insn.sibIndex) {
2094  default:
2095  debug("Unexpected sibIndex");
2096  return true;
2097 #define ENTRY(x) \
2098  case SIB_INDEX_##x: \
2099  indexReg = MCOperand::createReg(X86::x); break;
2102  REGS_XMM
2103  REGS_YMM
2104  REGS_ZMM
2105 #undef ENTRY
2106  }
2107  } else {
2108  // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2109  // but no index is used and modrm alone should have been enough.
2110  // -No base register in 32-bit mode. In 64-bit mode this is used to
2111  // avoid rip-relative addressing.
2112  // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2113  // base always requires a SIB byte.
2114  // -A scale other than 1 is used.
2115  if (!ForceSIB &&
2116  (insn.sibScale != 1 ||
2117  (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2118  (insn.sibBase != SIB_BASE_NONE &&
2119  insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2120  insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2121  indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2122  X86::RIZ);
2123  } else
2124  indexReg = MCOperand::createReg(X86::NoRegister);
2125  }
2126 
2127  scaleAmount = MCOperand::createImm(insn.sibScale);
2128  } else {
2129  switch (insn.eaBase) {
2130  case EA_BASE_NONE:
2131  if (insn.eaDisplacement == EA_DISP_NONE) {
2132  debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2133  return true;
2134  }
2135  if (insn.mode == MODE_64BIT){
2136  pcrel = insn.startLocation +
2139  insn.displacementOffset,
2140  insn.displacement + pcrel, Dis);
2141  // Section 2.2.1.6
2142  baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2143  X86::RIP);
2144  }
2145  else
2146  baseReg = MCOperand::createReg(X86::NoRegister);
2147 
2148  indexReg = MCOperand::createReg(X86::NoRegister);
2149  break;
2150  case EA_BASE_BX_SI:
2151  baseReg = MCOperand::createReg(X86::BX);
2152  indexReg = MCOperand::createReg(X86::SI);
2153  break;
2154  case EA_BASE_BX_DI:
2155  baseReg = MCOperand::createReg(X86::BX);
2156  indexReg = MCOperand::createReg(X86::DI);
2157  break;
2158  case EA_BASE_BP_SI:
2159  baseReg = MCOperand::createReg(X86::BP);
2160  indexReg = MCOperand::createReg(X86::SI);
2161  break;
2162  case EA_BASE_BP_DI:
2163  baseReg = MCOperand::createReg(X86::BP);
2164  indexReg = MCOperand::createReg(X86::DI);
2165  break;
2166  default:
2167  indexReg = MCOperand::createReg(X86::NoRegister);
2168  switch (insn.eaBase) {
2169  default:
2170  debug("Unexpected eaBase");
2171  return true;
2172  // Here, we will use the fill-ins defined above. However,
2173  // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2174  // sib and sib64 were handled in the top-level if, so they're only
2175  // placeholders to keep the compiler happy.
2176 #define ENTRY(x) \
2177  case EA_BASE_##x: \
2178  baseReg = MCOperand::createReg(X86::x); break;
2179  ALL_EA_BASES
2180 #undef ENTRY
2181 #define ENTRY(x) case EA_REG_##x:
2182  ALL_REGS
2183 #undef ENTRY
2184  debug("A R/M memory operand may not be a register; "
2185  "the base field must be a base.");
2186  return true;
2187  }
2188  }
2189 
2190  scaleAmount = MCOperand::createImm(1);
2191  }
2192 
2193  displacement = MCOperand::createImm(insn.displacement);
2194 
2196 
2197  mcInst.addOperand(baseReg);
2198  mcInst.addOperand(scaleAmount);
2199  mcInst.addOperand(indexReg);
2200  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
2201  insn.startLocation, insn.displacementOffset,
2202  insn.displacementSize, mcInst, Dis))
2203  mcInst.addOperand(displacement);
2204  mcInst.addOperand(segmentReg);
2205  return false;
2206 }
2207 
2208 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2209 /// byte of an instruction to LLVM form, and appends it to an MCInst.
2210 ///
2211 /// @param mcInst - The MCInst to append to.
2212 /// @param operand - The operand, as stored in the descriptor table.
2213 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2214 /// from.
2215 /// @return - 0 on success; nonzero otherwise
2216 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2217  InternalInstruction &insn, const MCDisassembler *Dis) {
2218  switch (operand.type) {
2219  default:
2220  debug("Unexpected type for a R/M operand");
2221  return true;
2222  case TYPE_R8:
2223  case TYPE_R16:
2224  case TYPE_R32:
2225  case TYPE_R64:
2226  case TYPE_Rv:
2227  case TYPE_MM64:
2228  case TYPE_XMM:
2229  case TYPE_YMM:
2230  case TYPE_ZMM:
2231  case TYPE_TMM:
2232  case TYPE_VK_PAIR:
2233  case TYPE_VK:
2234  case TYPE_DEBUGREG:
2235  case TYPE_CONTROLREG:
2236  case TYPE_BNDR:
2237  return translateRMRegister(mcInst, insn);
2238  case TYPE_M:
2239  case TYPE_MVSIBX:
2240  case TYPE_MVSIBY:
2241  case TYPE_MVSIBZ:
2242  return translateRMMemory(mcInst, insn, Dis);
2243  case TYPE_MSIB:
2244  return translateRMMemory(mcInst, insn, Dis, true);
2245  }
2246 }
2247 
2248 /// translateFPRegister - Translates a stack position on the FPU stack to its
2249 /// LLVM form, and appends it to an MCInst.
2250 ///
2251 /// @param mcInst - The MCInst to append to.
2252 /// @param stackPos - The stack position to translate.
2253 static void translateFPRegister(MCInst &mcInst,
2254  uint8_t stackPos) {
2255  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2256 }
2257 
2258 /// translateMaskRegister - Translates a 3-bit mask register number to
2259 /// LLVM form, and appends it to an MCInst.
2260 ///
2261 /// @param mcInst - The MCInst to append to.
2262 /// @param maskRegNum - Number of mask register from 0 to 7.
2263 /// @return - false on success; true otherwise.
2264 static bool translateMaskRegister(MCInst &mcInst,
2265  uint8_t maskRegNum) {
2266  if (maskRegNum >= 8) {
2267  debug("Invalid mask register number");
2268  return true;
2269  }
2270 
2271  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2272  return false;
2273 }
2274 
2275 /// translateOperand - Translates an operand stored in an internal instruction
2276 /// to LLVM's format and appends it to an MCInst.
2277 ///
2278 /// @param mcInst - The MCInst to append to.
2279 /// @param operand - The operand, as stored in the descriptor table.
2280 /// @param insn - The internal instruction.
2281 /// @return - false on success; true otherwise.
2282 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2283  InternalInstruction &insn,
2284  const MCDisassembler *Dis) {
2285  switch (operand.encoding) {
2286  default:
2287  debug("Unhandled operand encoding during translation");
2288  return true;
2289  case ENCODING_REG:
2290  translateRegister(mcInst, insn.reg);
2291  return false;
2292  case ENCODING_WRITEMASK:
2293  return translateMaskRegister(mcInst, insn.writemask);
2294  case ENCODING_SIB:
2297  return translateRM(mcInst, operand, insn, Dis);
2298  case ENCODING_IB:
2299  case ENCODING_IW:
2300  case ENCODING_ID:
2301  case ENCODING_IO:
2302  case ENCODING_Iv:
2303  case ENCODING_Ia:
2304  translateImmediate(mcInst,
2305  insn.immediates[insn.numImmediatesTranslated++],
2306  operand,
2307  insn,
2308  Dis);
2309  return false;
2310  case ENCODING_IRC:
2311  mcInst.addOperand(MCOperand::createImm(insn.RC));
2312  return false;
2313  case ENCODING_SI:
2314  return translateSrcIndex(mcInst, insn);
2315  case ENCODING_DI:
2316  return translateDstIndex(mcInst, insn);
2317  case ENCODING_RB:
2318  case ENCODING_RW:
2319  case ENCODING_RD:
2320  case ENCODING_RO:
2321  case ENCODING_Rv:
2322  translateRegister(mcInst, insn.opcodeRegister);
2323  return false;
2324  case ENCODING_CC:
2325  mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2326  return false;
2327  case ENCODING_FP:
2328  translateFPRegister(mcInst, insn.modRM & 7);
2329  return false;
2330  case ENCODING_VVVV:
2331  translateRegister(mcInst, insn.vvvv);
2332  return false;
2333  case ENCODING_DUP:
2334  return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2335  insn, Dis);
2336  }
2337 }
2338 
2339 /// translateInstruction - Translates an internal instruction and all its
2340 /// operands to an MCInst.
2341 ///
2342 /// @param mcInst - The MCInst to populate with the instruction's data.
2343 /// @param insn - The internal instruction.
2344 /// @return - false on success; true otherwise.
2345 static bool translateInstruction(MCInst &mcInst,
2346  InternalInstruction &insn,
2347  const MCDisassembler *Dis) {
2348  if (!insn.spec) {
2349  debug("Instruction has no specification");
2350  return true;
2351  }
2352 
2353  mcInst.clear();
2354  mcInst.setOpcode(insn.instructionID);
2355  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2356  // prefix bytes should be disassembled as xrelease and xacquire then set the
2357  // opcode to those instead of the rep and repne opcodes.
2358  if (insn.xAcquireRelease) {
2359  if(mcInst.getOpcode() == X86::REP_PREFIX)
2360  mcInst.setOpcode(X86::XRELEASE_PREFIX);
2361  else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2362  mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2363  }
2364 
2365  insn.numImmediatesTranslated = 0;
2366 
2367  for (const auto &Op : insn.operands) {
2368  if (Op.encoding != ENCODING_NONE) {
2369  if (translateOperand(mcInst, Op, insn, Dis)) {
2370  return true;
2371  }
2372  }
2373  }
2374 
2375  return false;
2376 }
2377 
2379  const MCSubtargetInfo &STI,
2380  MCContext &Ctx) {
2381  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2382  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2383 }
2384 
2386  // Register the disassembler.
2391 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::X86Disassembler::MODE_16BIT
@ MODE_16BIT
Definition: X86DisassemblerDecoderCommon.h:470
i
i
Definition: README.txt:29
byte
SSE Variable shift can be custom lowered to something like which uses a small table unaligned load shuffle instead of going through memory byte
Definition: README-SSE.txt:11
llvm::X86Disassembler::InternalInstruction::xAcquireRelease
bool xAcquireRelease
Definition: X86DisassemblerDecoder.h:554
vvvvFromVEX3of3
#define vvvvFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:56
llvm::X86Disassembler::InternalInstruction::vectorExtensionType
VectorExtensionType vectorExtensionType
Definition: X86DisassemblerDecoder.h:548
bFromXOP2of3
#define bFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:67
baseFromSIB
#define baseFromSIB(sib)
Definition: X86DisassemblerDecoder.h:30
llvm::X86Disassembler::MODE_64BIT
@ MODE_64BIT
Definition: X86DisassemblerDecoderCommon.h:472
llvm::X86Disassembler::InternalInstruction::hasLockPrefix
bool hasLockPrefix
Definition: X86DisassemblerDecoder.h:561
llvm::X86Disassembler::InternalInstruction::displacement
int32_t displacement
Definition: X86DisassemblerDecoder.h:610
xFromXOP2of3
#define xFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:66
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
THREEDNOW_MAP_SYM
#define THREEDNOW_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:33
lFromVEX2of2
#define lFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:62
llvm::X86Disassembler::TYPE_NO_VEX_XOP
@ TYPE_NO_VEX_XOP
Definition: X86DisassemblerDecoder.h:512
wFromVEX3of3
#define wFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:55
rFromEVEX2of4
#define rFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:36
type
llvm::X86Disassembler::VEX_LOB_0F3A
@ VEX_LOB_0F3A
Definition: X86DisassemblerDecoder.h:492
llvm::X86Disassembler::SEG_OVERRIDE_GS
@ SEG_OVERRIDE_GS
Definition: X86DisassemblerDecoder.h:484
OpcodeDecision
Definition: X86Disassembler.cpp:108
OpcodeDecision::modRMDecisions
ModRMDecision modRMDecisions[256]
Definition: X86Disassembler.cpp:109
ppFromXOP3of3
#define ppFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:72
MCDisassembler.h
debug
#define debug(s)
Definition: X86Disassembler.cpp:96
T
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::X86Disassembler::ATTR_REXW
@ ATTR_REXW
Definition: X86DisassemblerDecoderCommon.h:58
llvm::X86::IP_HAS_OP_SIZE
@ IP_HAS_OP_SIZE
Definition: X86BaseInfo.h:58
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1023
CASE_ENCODING_RM
#define CASE_ENCODING_RM
Definition: X86DisassemblerDecoderCommon.h:343
llvm::X86Disassembler::TYPE_EVEX
@ TYPE_EVEX
Definition: X86DisassemblerDecoder.h:515
llvm::X86Disassembler::InstructionSpecifier::operands
uint16_t operands
Definition: X86DisassemblerDecoder.h:522
llvm::X86Disassembler::SEG_OVERRIDE_SS
@ SEG_OVERRIDE_SS
Definition: X86DisassemblerDecoder.h:480
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
op
#define op(i)
translateImmediate
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
Definition: X86Disassembler.cpp:1909
isREX
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Definition: X86Disassembler.cpp:205
mmmmmFromVEX2of3
#define mmmmmFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:54
ModRMDecision::instructionIDs
uint16_t instructionIDs
Definition: X86Disassembler.cpp:103
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:469
ppFromVEX2of2
#define ppFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:63
llvm::X86Disassembler::VEX_LOB_0F38
@ VEX_LOB_0F38
Definition: X86DisassemblerDecoder.h:491
Fail
#define Fail
Definition: AArch64Disassembler.cpp:261
llvm::X86Disassembler::InternalInstruction::length
size_t length
Definition: X86DisassemblerDecoder.h:539
llvm::X86Disassembler::InternalInstruction::consumedModRM
bool consumedModRM
Definition: X86DisassemblerDecoder.h:603
llvm::X86Disassembler::InternalInstruction::segmentOverride
SegmentOverride segmentOverride
Definition: X86DisassemblerDecoder.h:552
llvm::X86Disassembler::SEG_OVERRIDE_FS
@ SEG_OVERRIDE_FS
Definition: X86DisassemblerDecoder.h:483
modFromModRM
#define modFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:25
scaleFromSIB
#define scaleFromSIB(sib)
Definition: X86DisassemblerDecoder.h:28
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
llvm::X86Disassembler::InternalInstruction::opcodeType
OpcodeType opcodeType
Definition: X86DisassemblerDecoder.h:584
readModRM
static int readModRM(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:607
translateFPRegister
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
Definition: X86Disassembler.cpp:2253
nextByte
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
Definition: WebAssemblyDisassembler.cpp:76
createX86Disassembler
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Definition: X86Disassembler.cpp:2378
llvm::X86Disassembler::EA_BASE_NONE
@ EA_BASE_NONE
Definition: X86DisassemblerDecoder.h:426
llvm::X86Disassembler::TYPE_XOP
@ TYPE_XOP
Definition: X86DisassemblerDecoder.h:516
llvm::X86Disassembler::InternalInstruction::sibIndex
SIBIndex sibIndex
Definition: X86DisassemblerDecoder.h:636
llvm::X86Disassembler::XOP_MAP_SELECT_8
@ XOP_MAP_SELECT_8
Definition: X86DisassemblerDecoder.h:498
llvm::TargetRegistry::RegisterMCDisassembler
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
Definition: TargetRegistry.h:916
rFromXOP2of3
#define rFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:65
T
#define T
Definition: Mips16ISelLowering.cpp:341
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
llvm::X86::BP_SI
@ BP_SI
Definition: X86Disassembler.cpp:1689
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::X86Disassembler::MODE_32BIT
@ MODE_32BIT
Definition: X86DisassemblerDecoderCommon.h:471
X86DisassemblerDecoder.h
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
rFromVEX2of3
#define rFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:51
MAP6_SYM
#define MAP6_SYM
Definition: X86DisassemblerDecoderCommon.h:35
ModRMDecision
Definition: X86Disassembler.cpp:101
translateSrcIndex
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Definition: X86Disassembler.cpp:1862
vvvvFromVEX2of2
#define vvvvFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:61
mmmFromEVEX2of4
#define mmmFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:40
llvm::X86Disassembler::ATTR_EVEX
@ ATTR_EVEX
Definition: X86DisassemblerDecoderCommon.h:63
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::X86Disassembler::ATTR_EVEXK
@ ATTR_EVEXK
Definition: X86DisassemblerDecoderCommon.h:65
MAP5_SYM
#define MAP5_SYM
Definition: X86DisassemblerDecoderCommon.h:34
fixupReg
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Definition: X86Disassembler.cpp:842
Format.h
ALL_SIB_BASES
#define ALL_SIB_BASES
Definition: X86DisassemblerDecoder.h:399
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
rFromVEX2of2
#define rFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:60
llvm::X86Disassembler::InternalInstruction::opcode
uint8_t opcode
Definition: X86DisassemblerDecoder.h:579
llvm::X86Disassembler::InternalInstruction::numImmediatesTranslated
uint8_t numImmediatesTranslated
Definition: X86DisassemblerDecoder.h:614
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
translateRMRegister
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
Definition: X86Disassembler.cpp:2015
peek
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Definition: X86Disassembler.cpp:184
llvm::X86Disassembler::ATTR_XS
@ ATTR_XS
Definition: X86DisassemblerDecoderCommon.h:56
bFromREX
#define bFromREX(rex)
Definition: X86DisassemblerDecoder.h:34
getInstructionIDWithAttrMask
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
Definition: X86Disassembler.cpp:1034
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::X86Disassembler::InternalInstruction::opcodeRegister
Reg opcodeRegister
Definition: X86DisassemblerDecoder.h:618
llvm::X86Disassembler::InstructionContext
InstructionContext
Definition: X86DisassemblerDecoderCommon.h:283
llvm::X86Disassembler::EA_DISP_32
@ EA_DISP_32
Definition: X86DisassemblerDecoder.h:465
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
xFromREX
#define xFromREX(rex)
Definition: X86DisassemblerDecoder.h:33
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
EA_BASES_64BIT
#define EA_BASES_64BIT
Definition: X86DisassemblerDecoder.h:169
llvm::X86Disassembler::SEG_OVERRIDE_CS
@ SEG_OVERRIDE_CS
Definition: X86DisassemblerDecoder.h:479
r2FromEVEX2of4
#define r2FromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:39
llvm::X86Disassembler::ATTR_EVEXL2
@ ATTR_EVEXL2
Definition: X86DisassemblerDecoderCommon.h:64
readSIB
static int readSIB(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:514
llvm::X86Disassembler::SIB_BASE_NONE
@ SIB_BASE_NONE
Definition: X86DisassemblerDecoder.h:453
llvm::X86Disassembler::MAP5
@ MAP5
Definition: X86DisassemblerDecoderCommon.h:300
llvm::X86Disassembler::XOP_MAP_SELECT_A
@ XOP_MAP_SELECT_A
Definition: X86DisassemblerDecoder.h:500
lFromVEX3of3
#define lFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:57
llvm::X86Disassembler::THREEBYTE_38
@ THREEBYTE_38
Definition: X86DisassemblerDecoderCommon.h:294
llvm::X86::BP_DI
@ BP_DI
Definition: X86Disassembler.cpp:1690
llvm::X86Disassembler::VEX_LOB_0F
@ VEX_LOB_0F
Definition: X86DisassemblerDecoder.h:490
wFromEVEX3of4
#define wFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:41
llvm::X86Disassembler::InternalInstruction
The x86 internal instruction, which is produced by the decoder.
Definition: X86DisassemblerDecoder.h:526
llvm::X86Disassembler::SEG_OVERRIDE_ES
@ SEG_OVERRIDE_ES
Definition: X86DisassemblerDecoder.h:482
llvm::X86::IP_HAS_AD_SIZE
@ IP_HAS_AD_SIZE
Definition: X86BaseInfo.h:59
llvm::X86Disassembler::InternalInstruction::rexPrefix
uint8_t rexPrefix
Definition: X86DisassemblerDecoder.h:550
llvm::X86Disassembler::EA_DISP_16
@ EA_DISP_16
Definition: X86DisassemblerDecoder.h:464
llvm::X86Disassembler::SIB_INDEX_NONE
@ SIB_INDEX_NONE
Definition: X86DisassemblerDecoder.h:441
MCContext.h
MCInstrInfo.h
llvm::X86Disassembler::InternalInstruction::hasAdSize
bool hasAdSize
Definition: X86DisassemblerDecoder.h:557
vvvvFromEVEX3of4
#define vvvvFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:42
XOP8_MAP_SYM
#define XOP8_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:30
llvm::X86Disassembler::InternalInstruction::vvvv
Reg vvvv
Definition: X86DisassemblerDecoder.h:596
MCInst.h
readImmediate
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1427
EA_BASES_32BIT
#define EA_BASES_32BIT
Definition: X86DisassemblerDecoder.h:133
MCSubtargetInfo.h
llvm::SIInstrFlags::DS
@ DS
Definition: SIDefines.h:59
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:111
readOperands
static int readOperands(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1511
llvm::X86Disassembler::VEX_PREFIX_F2
@ VEX_PREFIX_F2
Definition: X86DisassemblerDecoder.h:508
llvm::X86Disassembler::InternalInstruction::startLocation
uint64_t startLocation
Definition: X86DisassemblerDecoder.h:537
ppFromVEX3of3
#define ppFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:58
llvm::X86::sib64
@ sib64
Definition: X86Disassembler.cpp:1692
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::X86::BX_SI
@ BX_SI
Definition: X86Disassembler.cpp:1687
llvm::X86Disassembler::XOPA_MAP
@ XOPA_MAP
Definition: X86DisassemblerDecoderCommon.h:298
llvm::X86Disassembler::SIBIndex
SIBIndex
All possible values of the SIB index field.
Definition: X86DisassemblerDecoder.h:440
llvm::X86Disassembler::OperandEncoding
OperandEncoding
Definition: X86DisassemblerDecoderCommon.h:406
llvm::X86Disassembler::EA_DISP_NONE
@ EA_DISP_NONE
Definition: X86DisassemblerDecoder.h:462
translateOperand
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
Definition: X86Disassembler.cpp:2282
bFromVEX2of3
#define bFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:53
wFromREX
#define wFromREX(rex)
Definition: X86DisassemblerDecoder.h:31
llvm::X86Disassembler::ATTR_VEX
@ ATTR_VEX
Definition: X86DisassemblerDecoderCommon.h:61
llvm::MCDisassembler::DecodeStatus
DecodeStatus
Ternary decode status.
Definition: MCDisassembler.h:100
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
ONEBYTE_SYM
#define ONEBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:26
llvm::X86Disassembler::InternalInstruction::displacementSize
uint8_t displacementSize
Definition: X86DisassemblerDecoder.h:568
llvm::X86Disassembler::IC_max
@ IC_max
Definition: X86DisassemblerDecoderCommon.h:285
llvm::X86Disassembler::SIBBase
SIBBase
All possible values of the SIB base field.
Definition: X86DisassemblerDecoder.h:452
GENERIC_FIXUP_FUNC
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
Definition: X86Disassembler.cpp:745
readMaskRegister
static int readMaskRegister(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1498
getInstructionID
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
Definition: X86Disassembler.cpp:1089
llvm::X86Disassembler::InternalInstruction::modRM
uint8_t modRM
Definition: X86DisassemblerDecoder.h:604
llvm::X86Disassembler::InternalInstruction::eaRegBase
EABase eaRegBase
Definition: X86DisassemblerDecoder.h:624
llvm::X86Disassembler::TYPE_VEX_2B
@ TYPE_VEX_2B
Definition: X86DisassemblerDecoder.h:513
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
lFromXOP3of3
#define lFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:71
regFromModRM
#define regFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:26
LLVMInitializeX86Disassembler
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
Definition: X86Disassembler.cpp:2385
X86MCTargetDesc.h
llvm::X86Disassembler::ATTR_OPSIZE
@ ATTR_OPSIZE
Definition: X86DisassemblerDecoderCommon.h:59
llvm::X86Disassembler::InternalInstruction::RC
uint8_t RC
Definition: X86DisassemblerDecoder.h:641
llvm::X86Disassembler::VEX_PREFIX_F3
@ VEX_PREFIX_F3
Definition: X86DisassemblerDecoder.h:507
llvm::X86Disassembler::InternalInstruction::bytes
llvm::ArrayRef< uint8_t > bytes
Definition: X86DisassemblerDecoder.h:528
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
index
splat index
Definition: README_ALTIVEC.txt:181
uint64_t
llvm::X86Disassembler::ATTR_EVEXB
@ ATTR_EVEXB
Definition: X86DisassemblerDecoderCommon.h:67
llvm::X86Disassembler::VEX_LOB_MAP6
@ VEX_LOB_MAP6
Definition: X86DisassemblerDecoder.h:494
llvm::X86Disassembler::ATTR_VEXL
@ ATTR_VEXL
Definition: X86DisassemblerDecoderCommon.h:62
tryAddingSymbolicOperand
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, const MCDisassembler *Dis)
tryAddingSymbolicOperand - trys to add a symbolic operand in place of the immediate Value in the MCIn...
Definition: X86Disassembler.cpp:1827
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::X86AS::GS
@ GS
Definition: X86.h:187
llvm::X86Disassembler::XOP8_MAP
@ XOP8_MAP
Definition: X86DisassemblerDecoderCommon.h:296
llvm::MCDisassembler::tryAddingPcLoadReferenceComment
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
Definition: MCDisassembler.cpp:36
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::MCDisassembler
Superclass for all disassemblers.
Definition: MCDisassembler.h:76
ContextDecision
Definition: X86Disassembler.cpp:117
llvm::X86Disassembler::InternalInstruction::writemask
Reg writemask
Definition: X86DisassemblerDecoder.h:599
vvvvFromXOP3of3
#define vvvvFromXOP3of3(vex)
Definition: X86DisassemblerDecoder.h:70
decode
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
Definition: X86Disassembler.cpp:123
llvm::X86Disassembler::XOP_MAP_SELECT_9
@ XOP_MAP_SELECT_9
Definition: X86DisassemblerDecoder.h:499
llvm::X86Disassembler::InternalInstruction::immediates
uint64_t immediates[2]
Definition: X86DisassemblerDecoder.h:615
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::X86Disassembler::InternalInstruction::eaBase
EABase eaBase
Definition: X86DisassemblerDecoder.h:629
mmmmmFromXOP2of3
#define mmmmmFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:68
llvm::X86Disassembler::EABase
EABase
All possible values of the base field for effective-address computations, a.k.a.
Definition: X86DisassemblerDecoder.h:425
llvm::MCOI::OperandType
OperandType
Operands are tagged with one of the values of this enum.
Definition: MCInstrDesc.h:56
llvm::X86Disassembler::InternalInstruction::reg
Reg reg
Definition: X86DisassemblerDecoder.h:632
llvm::X86Disassembler::InternalInstruction::sib
uint8_t sib
Definition: X86DisassemblerDecoder.h:607
llvm::X86Disassembler::InternalInstruction::immediateOffset
uint8_t immediateOffset
Definition: X86DisassemblerDecoder.h:574
llvm::X86Disassembler::ONEBYTE
@ ONEBYTE
Definition: X86DisassemblerDecoderCommon.h:292
llvm::X86::IP_NO_PREFIX
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:57
llvm::HighlightColor::Address
@ Address
ppFromEVEX3of4
#define ppFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:43
rmFromModRM
#define rmFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:27
llvm::X86Disassembler::InternalInstruction::spec
const InstructionSpecifier * spec
Definition: X86DisassemblerDecoder.h:588
llvm::X86Disassembler::ATTR_ADSIZE
@ ATTR_ADSIZE
Definition: X86DisassemblerDecoderCommon.h:60
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateInstruction
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
Definition: X86Disassembler.cpp:2345
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
SI
StandardInstrumentations SI(Debug, VerifyEach)
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::X86Disassembler::InternalInstruction::instructionID
uint16_t instructionID
Definition: X86DisassemblerDecoder.h:586
llvm::X86Disassembler::ATTR_EVEXKZ
@ ATTR_EVEXKZ
Definition: X86DisassemblerDecoderCommon.h:66
isBranch
static bool isBranch(unsigned Opcode)
Definition: R600InstrInfo.cpp:647
llvm::X86Disassembler::OperandSpecifier::encoding
uint8_t encoding
Definition: X86DisassemblerDecoderCommon.h:460
llvm::MCDisassembler::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) const
Definition: MCDisassembler.cpp:26
llvm::MCInstrInfo::getName
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:68
llvm::X86Disassembler::InternalInstruction::addressSize
uint8_t addressSize
Definition: X86DisassemblerDecoder.h:567
llvm::X86Disassembler::InternalInstruction::numImmediatesConsumed
uint8_t numImmediatesConsumed
Definition: X86DisassemblerDecoder.h:613
zFromEVEX4of4
#define zFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:44
llvm::X86Disassembler::InternalInstruction::operands
ArrayRef< OperandSpecifier > operands
Definition: X86DisassemblerDecoder.h:643
llvm::X86Disassembler::XOP9_MAP
@ XOP9_MAP
Definition: X86DisassemblerDecoderCommon.h:297
llvm::X86::BX_DI
@ BX_DI
Definition: X86Disassembler.cpp:1688
llvm::X86Disassembler::InternalInstruction::displacementOffset
uint8_t displacementOffset
Definition: X86DisassemblerDecoder.h:573
llvm::X86Disassembler::OpcodeType
OpcodeType
Definition: X86DisassemblerDecoderCommon.h:291
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
llvm::HexagonMCInstrInfo::isPrefix
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
Definition: HexagonMCInstrInfo.cpp:724
llvm::ArrayRef< uint8_t >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
bFromEVEX2of4
#define bFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:38
llvm::X86Disassembler::InternalInstruction::regBase
Reg regBase
Definition: X86DisassemblerDecoder.h:625
readVVVV
static int readVVVV(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1471
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::X86Disassembler::MAP6
@ MAP6
Definition: X86DisassemblerDecoderCommon.h:301
uint32_t
translateDstIndex
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
Definition: X86Disassembler.cpp:1887
llvm::X86Disassembler::OperandSpecifier
The specification for how to extract and interpret one operand.
Definition: X86DisassemblerDecoderCommon.h:459
readOpcodeRegister
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1384
llvm::X86Disassembler::InternalInstruction::repeatPrefix
uint8_t repeatPrefix
Definition: X86DisassemblerDecoder.h:563
translateRM
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
Definition: X86Disassembler.cpp:2216
llvm::X86Disassembler::InternalInstruction::eaDisplacement
EADisplacement eaDisplacement
Definition: X86DisassemblerDecoder.h:630
rFromREX
#define rFromREX(rex)
Definition: X86DisassemblerDecoder.h:32
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::X86Disassembler::VEX_PREFIX_66
@ VEX_PREFIX_66
Definition: X86DisassemblerDecoder.h:506
tryAddingPcLoadReferenceComment
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder)
tryAddingPcLoadReferenceComment - trys to add a comment as to what is being referenced by a load inst...
Definition: X86Disassembler.cpp:1842
ContextDecision::opcodeDecisions
OpcodeDecision opcodeDecisions[IC_max]
Definition: X86Disassembler.cpp:118
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
readOpcode
static bool readOpcode(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:879
llvm::X86Disassembler::InternalInstruction::readerCursor
uint64_t readerCursor
Definition: X86DisassemblerDecoder.h:530
TWOBYTE_SYM
#define TWOBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:27
llvm::X86Disassembler::InternalInstruction::mode
DisassemblerMode mode
Definition: X86DisassemblerDecoder.h:535
llvm::X86Disassembler
Definition: X86DisassemblerDecoderCommon.h:22
v2FromEVEX4of4
#define v2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:48
aaaFromEVEX4of4
#define aaaFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:49
REGS_ZMM
#define REGS_ZMM
Definition: X86DisassemblerDecoder.h:283
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:78
readDisplacement
static int readDisplacement(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:576
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
ALL_REGS
#define ALL_REGS
Definition: X86DisassemblerDecoder.h:403
llvm::X86Disassembler::InternalInstruction::hasOpSize
bool hasOpSize
Definition: X86DisassemblerDecoder.h:559
llvm::X86Disassembler::ATTR_XD
@ ATTR_XD
Definition: X86DisassemblerDecoderCommon.h:57
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:25
llvm::X86::sib
@ sib
Definition: X86Disassembler.cpp:1691
llvm::X86Disassembler::SEG_OVERRIDE_max
@ SEG_OVERRIDE_max
Definition: X86DisassemblerDecoder.h:485
REGS_YMM
#define REGS_YMM
Definition: X86DisassemblerDecoder.h:249
std
Definition: BitVector.h:838
XOPA_MAP_SYM
#define XOPA_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:32
translateRegister
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
Definition: X86Disassembler.cpp:1799
ALL_EA_BASES
#define ALL_EA_BASES
Definition: X86DisassemblerDecoder.h:394
uint16_t
translateRMMemory
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
Definition: X86Disassembler.cpp:2054
THREEBYTE38_SYM
#define THREEBYTE38_SYM
Definition: X86DisassemblerDecoderCommon.h:28
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
XOP9_MAP_SYM
#define XOP9_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:31
translateMaskRegister
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
Definition: X86Disassembler.cpp:2264
llvm::X86AS::FS
@ FS
Definition: X86.h:188
Success
#define Success
Definition: AArch64Disassembler.cpp:260
llvm::X86Disassembler::OperandSpecifier::type
uint8_t type
Definition: X86DisassemblerDecoderCommon.h:461
lFromEVEX4of4
#define lFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:46
llvm::X86::IP_HAS_REPEAT_NE
@ IP_HAS_REPEAT_NE
Definition: X86BaseInfo.h:60
llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition: MCInst.h:198
llvm::X86Disassembler::TWOBYTE
@ TWOBYTE
Definition: X86DisassemblerDecoderCommon.h:293
llvm::X86Disassembler::TYPE_VEX_3B
@ TYPE_VEX_3B
Definition: X86DisassemblerDecoder.h:514
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
wFromXOP3of3
#define wFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:69
REGS_XMM
#define REGS_XMM
Definition: X86DisassemblerDecoder.h:215
THREEBYTE3A_SYM
#define THREEBYTE3A_SYM
Definition: X86DisassemblerDecoderCommon.h:29
llvm::X86Disassembler::EA_DISP_8
@ EA_DISP_8
Definition: X86DisassemblerDecoder.h:463
X86BaseInfo.h
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::X86Disassembler::DisassemblerMode
DisassemblerMode
Decoding mode for the Intel disassembler.
Definition: X86DisassemblerDecoderCommon.h:469
llvm::X86Disassembler::InternalInstruction::mandatoryPrefix
uint8_t mandatoryPrefix
Definition: X86DisassemblerDecoder.h:544
l2FromEVEX4of4
#define l2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:45
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::X86Disassembler::InternalInstruction::sibIndexBase
SIBIndex sibIndexBase
Definition: X86DisassemblerDecoder.h:635
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
INSTRUCTIONS_SYM
#define INSTRUCTIONS_SYM
Definition: X86DisassemblerDecoderCommon.h:24
llvm::X86Disassembler::InternalInstruction::sibScale
uint8_t sibScale
Definition: X86DisassemblerDecoder.h:637
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
is16BitEquivalent
static bool is16BitEquivalent(const char *orig, const char *equiv)
Definition: X86Disassembler.cpp:1004
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
bFromEVEX4of4
#define bFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:47
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
CASE_ENCODING_VSIB
#define CASE_ENCODING_VSIB
Definition: X86DisassemblerDecoderCommon.h:352
xFromEVEX2of4
#define xFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:37
llvm::X86Disassembler::OperandType
OperandType
Definition: X86DisassemblerDecoderCommon.h:452
llvm::X86Disassembler::VEX_LOB_MAP5
@ VEX_LOB_MAP5
Definition: X86DisassemblerDecoder.h:493
xFromVEX2of3
#define xFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:52
llvm::X86Disassembler::THREEDNOW_MAP
@ THREEDNOW_MAP
Definition: X86DisassemblerDecoderCommon.h:299
llvm::X86Disassembler::InstrUID
uint16_t InstrUID
Definition: X86DisassemblerDecoderCommon.h:311
llvm::X86Disassembler::SEG_OVERRIDE_DS
@ SEG_OVERRIDE_DS
Definition: X86DisassemblerDecoder.h:481
llvm::X86Disassembler::InstructionSpecifier
The specification for how to extract and interpret a full instruction and its operands.
Definition: X86DisassemblerDecoder.h:521
readPrefixes
static int readPrefixes(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:214
raw_ostream.h
llvm::X86Disassembler::InternalInstruction::registerSize
uint8_t registerSize
Definition: X86DisassemblerDecoder.h:566
indexFromSIB
#define indexFromSIB(sib)
Definition: X86DisassemblerDecoder.h:29
segmentRegnums
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Definition: X86Disassembler.cpp:1848
llvm::X86Disassembler::ATTR_64BIT
@ ATTR_64BIT
Definition: X86DisassemblerDecoderCommon.h:55
X86
Unrolling by would eliminate the &in both leading to a net reduction in code size The resultant code would then also be suitable for exit value computation We miss a bunch of rotate opportunities on various including etc On X86
Definition: README.txt:568
X86TargetInfo.h
TargetRegistry.h
ModRMDecision::modrm_type
uint8_t modrm_type
Definition: X86Disassembler.cpp:102
MCExpr.h
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::X86Disassembler::THREEBYTE_3A
@ THREEBYTE_3A
Definition: X86DisassemblerDecoderCommon.h:295
llvm::X86Disassembler::ATTR_NONE
@ ATTR_NONE
Definition: X86DisassemblerDecoderCommon.h:54
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::MCInst::clear
void clear()
Definition: MCInst.h:215
llvm::X86Disassembler::InternalInstruction::immediateSize
uint8_t immediateSize
Definition: X86DisassemblerDecoder.h:569
llvm::X86Disassembler::InternalInstruction::vectorExtensionPrefix
uint8_t vectorExtensionPrefix[4]
Definition: X86DisassemblerDecoder.h:546
llvm::X86Disassembler::InternalInstruction::sibBase
SIBBase sibBase
Definition: X86DisassemblerDecoder.h:638