LLVM  3.7.0
X86DisassemblerDecoder.h
Go to the documentation of this file.
1 //===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler.
11 // It contains the public interface of the instruction decoder.
12 // Documentation for the disassembler can be found in X86Disassembler.h.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
17 #define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
18 
20 #include "llvm/ADT/ArrayRef.h"
21 
22 namespace llvm {
23 namespace X86Disassembler {
24 
25 // Accessor functions for various fields of an Intel instruction
26 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
27 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
28 #define rmFromModRM(modRM) ((modRM) & 0x7)
29 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
30 #define indexFromSIB(sib) (((sib) & 0x38) >> 3)
31 #define baseFromSIB(sib) ((sib) & 0x7)
32 #define wFromREX(rex) (((rex) & 0x8) >> 3)
33 #define rFromREX(rex) (((rex) & 0x4) >> 2)
34 #define xFromREX(rex) (((rex) & 0x2) >> 1)
35 #define bFromREX(rex) ((rex) & 0x1)
36 
37 #define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
38 #define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
39 #define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
40 #define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
41 #define mmFromEVEX2of4(evex) ((evex) & 0x3)
42 #define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
43 #define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
44 #define ppFromEVEX3of4(evex) ((evex) & 0x3)
45 #define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
46 #define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
47 #define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
48 #define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
49 #define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
50 #define aaaFromEVEX4of4(evex) ((evex) & 0x7)
51 
52 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
53 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
54 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
55 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
56 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
57 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
58 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
59 #define ppFromVEX3of3(vex) ((vex) & 0x3)
60 
61 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
62 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
63 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
64 #define ppFromVEX2of2(vex) ((vex) & 0x3)
65 
66 #define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
67 #define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
68 #define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
69 #define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
70 #define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
71 #define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
72 #define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
73 #define ppFromXOP3of3(xop) ((xop) & 0x3)
74 
75 // These enums represent Intel registers for use by the decoder.
76 #define REGS_8BIT \
77  ENTRY(AL) \
78  ENTRY(CL) \
79  ENTRY(DL) \
80  ENTRY(BL) \
81  ENTRY(AH) \
82  ENTRY(CH) \
83  ENTRY(DH) \
84  ENTRY(BH) \
85  ENTRY(R8B) \
86  ENTRY(R9B) \
87  ENTRY(R10B) \
88  ENTRY(R11B) \
89  ENTRY(R12B) \
90  ENTRY(R13B) \
91  ENTRY(R14B) \
92  ENTRY(R15B) \
93  ENTRY(SPL) \
94  ENTRY(BPL) \
95  ENTRY(SIL) \
96  ENTRY(DIL)
97 
98 #define EA_BASES_16BIT \
99  ENTRY(BX_SI) \
100  ENTRY(BX_DI) \
101  ENTRY(BP_SI) \
102  ENTRY(BP_DI) \
103  ENTRY(SI) \
104  ENTRY(DI) \
105  ENTRY(BP) \
106  ENTRY(BX) \
107  ENTRY(R8W) \
108  ENTRY(R9W) \
109  ENTRY(R10W) \
110  ENTRY(R11W) \
111  ENTRY(R12W) \
112  ENTRY(R13W) \
113  ENTRY(R14W) \
114  ENTRY(R15W)
115 
116 #define REGS_16BIT \
117  ENTRY(AX) \
118  ENTRY(CX) \
119  ENTRY(DX) \
120  ENTRY(BX) \
121  ENTRY(SP) \
122  ENTRY(BP) \
123  ENTRY(SI) \
124  ENTRY(DI) \
125  ENTRY(R8W) \
126  ENTRY(R9W) \
127  ENTRY(R10W) \
128  ENTRY(R11W) \
129  ENTRY(R12W) \
130  ENTRY(R13W) \
131  ENTRY(R14W) \
132  ENTRY(R15W)
133 
134 #define EA_BASES_32BIT \
135  ENTRY(EAX) \
136  ENTRY(ECX) \
137  ENTRY(EDX) \
138  ENTRY(EBX) \
139  ENTRY(sib) \
140  ENTRY(EBP) \
141  ENTRY(ESI) \
142  ENTRY(EDI) \
143  ENTRY(R8D) \
144  ENTRY(R9D) \
145  ENTRY(R10D) \
146  ENTRY(R11D) \
147  ENTRY(R12D) \
148  ENTRY(R13D) \
149  ENTRY(R14D) \
150  ENTRY(R15D)
151 
152 #define REGS_32BIT \
153  ENTRY(EAX) \
154  ENTRY(ECX) \
155  ENTRY(EDX) \
156  ENTRY(EBX) \
157  ENTRY(ESP) \
158  ENTRY(EBP) \
159  ENTRY(ESI) \
160  ENTRY(EDI) \
161  ENTRY(R8D) \
162  ENTRY(R9D) \
163  ENTRY(R10D) \
164  ENTRY(R11D) \
165  ENTRY(R12D) \
166  ENTRY(R13D) \
167  ENTRY(R14D) \
168  ENTRY(R15D)
169 
170 #define EA_BASES_64BIT \
171  ENTRY(RAX) \
172  ENTRY(RCX) \
173  ENTRY(RDX) \
174  ENTRY(RBX) \
175  ENTRY(sib64) \
176  ENTRY(RBP) \
177  ENTRY(RSI) \
178  ENTRY(RDI) \
179  ENTRY(R8) \
180  ENTRY(R9) \
181  ENTRY(R10) \
182  ENTRY(R11) \
183  ENTRY(R12) \
184  ENTRY(R13) \
185  ENTRY(R14) \
186  ENTRY(R15)
187 
188 #define REGS_64BIT \
189  ENTRY(RAX) \
190  ENTRY(RCX) \
191  ENTRY(RDX) \
192  ENTRY(RBX) \
193  ENTRY(RSP) \
194  ENTRY(RBP) \
195  ENTRY(RSI) \
196  ENTRY(RDI) \
197  ENTRY(R8) \
198  ENTRY(R9) \
199  ENTRY(R10) \
200  ENTRY(R11) \
201  ENTRY(R12) \
202  ENTRY(R13) \
203  ENTRY(R14) \
204  ENTRY(R15)
205 
206 #define REGS_MMX \
207  ENTRY(MM0) \
208  ENTRY(MM1) \
209  ENTRY(MM2) \
210  ENTRY(MM3) \
211  ENTRY(MM4) \
212  ENTRY(MM5) \
213  ENTRY(MM6) \
214  ENTRY(MM7)
215 
216 #define REGS_XMM \
217  ENTRY(XMM0) \
218  ENTRY(XMM1) \
219  ENTRY(XMM2) \
220  ENTRY(XMM3) \
221  ENTRY(XMM4) \
222  ENTRY(XMM5) \
223  ENTRY(XMM6) \
224  ENTRY(XMM7) \
225  ENTRY(XMM8) \
226  ENTRY(XMM9) \
227  ENTRY(XMM10) \
228  ENTRY(XMM11) \
229  ENTRY(XMM12) \
230  ENTRY(XMM13) \
231  ENTRY(XMM14) \
232  ENTRY(XMM15) \
233  ENTRY(XMM16) \
234  ENTRY(XMM17) \
235  ENTRY(XMM18) \
236  ENTRY(XMM19) \
237  ENTRY(XMM20) \
238  ENTRY(XMM21) \
239  ENTRY(XMM22) \
240  ENTRY(XMM23) \
241  ENTRY(XMM24) \
242  ENTRY(XMM25) \
243  ENTRY(XMM26) \
244  ENTRY(XMM27) \
245  ENTRY(XMM28) \
246  ENTRY(XMM29) \
247  ENTRY(XMM30) \
248  ENTRY(XMM31)
249 
250 #define REGS_YMM \
251  ENTRY(YMM0) \
252  ENTRY(YMM1) \
253  ENTRY(YMM2) \
254  ENTRY(YMM3) \
255  ENTRY(YMM4) \
256  ENTRY(YMM5) \
257  ENTRY(YMM6) \
258  ENTRY(YMM7) \
259  ENTRY(YMM8) \
260  ENTRY(YMM9) \
261  ENTRY(YMM10) \
262  ENTRY(YMM11) \
263  ENTRY(YMM12) \
264  ENTRY(YMM13) \
265  ENTRY(YMM14) \
266  ENTRY(YMM15) \
267  ENTRY(YMM16) \
268  ENTRY(YMM17) \
269  ENTRY(YMM18) \
270  ENTRY(YMM19) \
271  ENTRY(YMM20) \
272  ENTRY(YMM21) \
273  ENTRY(YMM22) \
274  ENTRY(YMM23) \
275  ENTRY(YMM24) \
276  ENTRY(YMM25) \
277  ENTRY(YMM26) \
278  ENTRY(YMM27) \
279  ENTRY(YMM28) \
280  ENTRY(YMM29) \
281  ENTRY(YMM30) \
282  ENTRY(YMM31)
283 
284 #define REGS_ZMM \
285  ENTRY(ZMM0) \
286  ENTRY(ZMM1) \
287  ENTRY(ZMM2) \
288  ENTRY(ZMM3) \
289  ENTRY(ZMM4) \
290  ENTRY(ZMM5) \
291  ENTRY(ZMM6) \
292  ENTRY(ZMM7) \
293  ENTRY(ZMM8) \
294  ENTRY(ZMM9) \
295  ENTRY(ZMM10) \
296  ENTRY(ZMM11) \
297  ENTRY(ZMM12) \
298  ENTRY(ZMM13) \
299  ENTRY(ZMM14) \
300  ENTRY(ZMM15) \
301  ENTRY(ZMM16) \
302  ENTRY(ZMM17) \
303  ENTRY(ZMM18) \
304  ENTRY(ZMM19) \
305  ENTRY(ZMM20) \
306  ENTRY(ZMM21) \
307  ENTRY(ZMM22) \
308  ENTRY(ZMM23) \
309  ENTRY(ZMM24) \
310  ENTRY(ZMM25) \
311  ENTRY(ZMM26) \
312  ENTRY(ZMM27) \
313  ENTRY(ZMM28) \
314  ENTRY(ZMM29) \
315  ENTRY(ZMM30) \
316  ENTRY(ZMM31)
317 
318 #define REGS_MASKS \
319  ENTRY(K0) \
320  ENTRY(K1) \
321  ENTRY(K2) \
322  ENTRY(K3) \
323  ENTRY(K4) \
324  ENTRY(K5) \
325  ENTRY(K6) \
326  ENTRY(K7)
327 
328 #define REGS_SEGMENT \
329  ENTRY(ES) \
330  ENTRY(CS) \
331  ENTRY(SS) \
332  ENTRY(DS) \
333  ENTRY(FS) \
334  ENTRY(GS)
335 
336 #define REGS_DEBUG \
337  ENTRY(DR0) \
338  ENTRY(DR1) \
339  ENTRY(DR2) \
340  ENTRY(DR3) \
341  ENTRY(DR4) \
342  ENTRY(DR5) \
343  ENTRY(DR6) \
344  ENTRY(DR7) \
345  ENTRY(DR8) \
346  ENTRY(DR9) \
347  ENTRY(DR10) \
348  ENTRY(DR11) \
349  ENTRY(DR12) \
350  ENTRY(DR13) \
351  ENTRY(DR14) \
352  ENTRY(DR15)
353 
354 #define REGS_CONTROL \
355  ENTRY(CR0) \
356  ENTRY(CR1) \
357  ENTRY(CR2) \
358  ENTRY(CR3) \
359  ENTRY(CR4) \
360  ENTRY(CR5) \
361  ENTRY(CR6) \
362  ENTRY(CR7) \
363  ENTRY(CR8) \
364  ENTRY(CR9) \
365  ENTRY(CR10) \
366  ENTRY(CR11) \
367  ENTRY(CR12) \
368  ENTRY(CR13) \
369  ENTRY(CR14) \
370  ENTRY(CR15)
371 
372 #define ALL_EA_BASES \
373  EA_BASES_16BIT \
374  EA_BASES_32BIT \
375  EA_BASES_64BIT
376 
377 #define ALL_SIB_BASES \
378  REGS_32BIT \
379  REGS_64BIT
380 
381 #define ALL_REGS \
382  REGS_8BIT \
383  REGS_16BIT \
384  REGS_32BIT \
385  REGS_64BIT \
386  REGS_MMX \
387  REGS_XMM \
388  REGS_YMM \
389  REGS_ZMM \
390  REGS_MASKS \
391  REGS_SEGMENT \
392  REGS_DEBUG \
393  REGS_CONTROL \
394  ENTRY(RIP)
395 
396 /// \brief All possible values of the base field for effective-address
397 /// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
398 /// We distinguish between bases (EA_BASE_*) and registers that just happen
399 /// to be referred to when Mod == 0b11 (EA_REG_*).
400 enum EABase {
402 #define ENTRY(x) EA_BASE_##x,
404 #undef ENTRY
405 #define ENTRY(x) EA_REG_##x,
406  ALL_REGS
407 #undef ENTRY
409 };
410 
411 /// \brief All possible values of the SIB index field.
412 /// borrows entries from ALL_EA_BASES with the special case that
413 /// sib is synonymous with NONE.
414 /// Vector SIB: index can be XMM or YMM.
415 enum SIBIndex {
417 #define ENTRY(x) SIB_INDEX_##x,
419  REGS_XMM
420  REGS_YMM
421  REGS_ZMM
422 #undef ENTRY
424 };
425 
426 /// \brief All possible values of the SIB base field.
427 enum SIBBase {
429 #define ENTRY(x) SIB_BASE_##x,
431 #undef ENTRY
433 };
434 
435 /// \brief Possible displacement types for effective-address computations.
436 typedef enum {
442 
443 /// \brief All possible values of the reg field in the ModR/M byte.
444 enum Reg {
445 #define ENTRY(x) MODRM_REG_##x,
446  ALL_REGS
447 #undef ENTRY
449 };
450 
451 /// \brief All possible segment overrides.
461 };
462 
463 /// \brief Possible values for the VEX.m-mmmm field
465  VEX_LOB_0F = 0x1,
468 };
469 
474 };
475 
476 /// \brief Possible values for the VEX.pp/EVEX.pp field
482 };
483 
486  TYPE_VEX_2B = 0x1,
487  TYPE_VEX_3B = 0x2,
488  TYPE_EVEX = 0x3,
489  TYPE_XOP = 0x4
490 };
491 
492 /// \brief Type for the byte reader that the consumer must provide to
493 /// the decoder. Reads a single byte from the instruction's address space.
494 /// \param arg A baton that the consumer can associate with any internal
495 /// state that it needs.
496 /// \param byte A pointer to a single byte in memory that should be set to
497 /// contain the value at address.
498 /// \param address The address in the instruction's address space that should
499 /// be read from.
500 /// \return -1 if the byte cannot be read for any reason; 0 otherwise.
501 typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
502 
503 /// \brief Type for the logging function that the consumer can provide to
504 /// get debugging output from the decoder.
505 /// \param arg A baton that the consumer can associate with any internal
506 /// state that it needs.
507 /// \param log A string that contains the message. Will be reused after
508 /// the logger returns.
509 typedef void (*dlog_t)(void *arg, const char *log);
510 
511 /// The specification for how to extract and interpret a full instruction and
512 /// its operands.
514  uint16_t operands;
515 };
516 
517 /// The x86 internal instruction, which is produced by the decoder.
519  // Reader interface (C)
521  // Opaque value passed to the reader
522  const void* readerArg;
523  // The address of the next byte to read via the reader
524  uint64_t readerCursor;
525 
526  // Logger interface (C)
528  // Opaque value passed to the logger
529  void* dlogArg;
530 
531  // General instruction information
532 
533  // The mode to disassemble for (64-bit, protected, real)
535  // The start of the instruction, usable with the reader
536  uint64_t startLocation;
537  // The length of the instruction, in bytes
538  size_t length;
539 
540  // Prefix state
541 
542  // 1 if the prefix byte corresponding to the entry is present; 0 if not
543  uint8_t prefixPresent[0x100];
544  // contains the location (for use with the reader) of the prefix byte
545  uint64_t prefixLocations[0x100];
546  // The value of the vector extension prefix(EVEX/VEX/XOP), if present
548  // The type of the vector extension prefix
550  // The value of the REX prefix, if present
551  uint8_t rexPrefix;
552  // The location where a mandatory prefix would have to be (i.e., right before
553  // the opcode, or right before the REX prefix if one is present).
555  // The segment override type
557  // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
559 
560  // Sizes of various critical pieces of data, in bytes
561  uint8_t registerSize;
562  uint8_t addressSize;
564  uint8_t immediateSize;
565 
566  // Offsets from the start of the instruction to the pieces of data, which is
567  // needed to find relocation entries for adding symbolic operands.
570 
571  // opcode state
572 
573  // The last byte of the opcode, not counting any ModR/M extension
574  uint8_t opcode;
575  // The ModR/M byte of the instruction, if it is an opcode extension
576  uint8_t modRMExtension;
577 
578  // decode state
579 
580  // The type of opcode, used for indexing into the array of decode tables
582  // The instruction ID, extracted from the decode table
583  uint16_t instructionID;
584  // The specifier for the instruction, from the instruction info table
586 
587  // state for additional bytes, consumed during operand decode. Pattern:
588  // consumed___ indicates that the byte was already consumed and does not
589  // need to be consumed again.
590 
591  // The VEX.vvvv field, which contains a third register operand for some AVX
592  // instructions.
594 
595  // The writemask for AVX-512 instructions which is contained in EVEX.aaa
597 
598  // The ModR/M byte, which contains most register operands and some portion of
599  // all memory operands.
601  uint8_t modRM;
602 
603  // The SIB byte, used for more complex 32- or 64-bit memory operands
605  uint8_t sib;
606 
607  // The displacement, used for memory operands
609  int32_t displacement;
610 
611  // Immediates. There can be two in some cases
614  uint64_t immediates[2];
615 
616  // A register or immediate operand encoded into the opcode
618 
619  // Portions of the ModR/M byte
620 
621  // These fields determine the allowable values for the ModR/M fields, which
622  // depend on operand and address widths.
626 
627  // The Mod and R/M fields can encode a base for an effective address, or a
628  // register. These are separated into two fields here.
631  // The reg field always encodes a register
633 
634  // SIB state
636  uint8_t sibScale;
638 
640 };
641 
642 /// \brief Decode one instruction and store the decoding results in
643 /// a buffer provided by the consumer.
644 /// \param insn The buffer to store the instruction in. Allocated by the
645 /// consumer.
646 /// \param reader The byteReader_t for the bytes to be read.
647 /// \param readerArg An argument to pass to the reader for storing context
648 /// specific to the consumer. May be NULL.
649 /// \param logger The dlog_t to be used in printing status messages from the
650 /// disassembler. May be NULL.
651 /// \param loggerArg An argument to pass to the logger for storing context
652 /// specific to the logger. May be NULL.
653 /// \param startLoc The address (in the reader's address space) of the first
654 /// byte in the instruction.
655 /// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
656 /// \return Nonzero if there was an error during decode, 0 otherwise.
658  byteReader_t reader,
659  const void *readerArg,
660  dlog_t logger,
661  void *loggerArg,
662  const void *miiArg,
663  uint64_t startLoc,
664  DisassemblerMode mode);
665 
666 /// \brief Print a message to debugs()
667 /// \param file The name of the file printing the debug message.
668 /// \param line The line number that printed the debug message.
669 /// \param s The message to print.
670 void Debug(const char *file, unsigned line, const char *s);
671 
672 const char *GetInstrName(unsigned Opcode, const void *mii);
673 
674 } // namespace X86Disassembler
675 } // namespace llvm
676 
677 #endif
The specification for how to extract and interpret a full instruction and its operands.
#define ALL_REGS
EADisplacement
Possible displacement types for effective-address computations.
SIBIndex
All possible values of the SIB index field.
Reg
All possible values of the reg field in the ModR/M byte.
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If Reload the main corpus periodically to get new units discovered by other processes Read the given input file
#define ALL_EA_BASES
VEXLeadingOpcodeByte
Possible values for the VEX.m-mmmm field.
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define ALL_SIB_BASES
const char * GetInstrName(unsigned Opcode, const void *mii)
void Debug(const char *file, unsigned line, const char *s)
Print a message to debugs()
The x86 internal instruction, which is produced by the decoder.
VEXPrefixCode
Possible values for the VEX.pp/EVEX.pp field.
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
SegmentOverride
All possible segment overrides.
#define REGS_ZMM
SIBBase
All possible values of the SIB base field.
#define REGS_XMM
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define REGS_YMM
DisassemblerMode
Decoding mode for the Intel disassembler.