LLVM API Documentation

X86Disassembler.cpp
Go to the documentation of this file.
00001 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file is part of the X86 Disassembler.
00011 // It contains code to translate the data produced by the decoder into
00012 //  MCInsts.
00013 // Documentation for the disassembler can be found in X86Disassembler.h.
00014 //
00015 //===----------------------------------------------------------------------===//
00016 
00017 #include "X86Disassembler.h"
00018 #include "X86DisassemblerDecoder.h"
00019 #include "llvm/MC/MCContext.h"
00020 #include "llvm/MC/MCDisassembler.h"
00021 #include "llvm/MC/MCExpr.h"
00022 #include "llvm/MC/MCInst.h"
00023 #include "llvm/MC/MCInstrInfo.h"
00024 #include "llvm/MC/MCSubtargetInfo.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/MemoryObject.h"
00027 #include "llvm/Support/TargetRegistry.h"
00028 #include "llvm/Support/raw_ostream.h"
00029 
00030 #define GET_REGINFO_ENUM
00031 #include "X86GenRegisterInfo.inc"
00032 #define GET_INSTRINFO_ENUM
00033 #include "X86GenInstrInfo.inc"
00034 
00035 using namespace llvm;
00036 using namespace llvm::X86Disassembler;
00037 
00038 void x86DisassemblerDebug(const char *file,
00039                           unsigned line,
00040                           const char *s) {
00041   dbgs() << file << ":" << line << ": " << s;
00042 }
00043 
00044 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
00045   const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
00046   return MII->getName(Opcode);
00047 }
00048 
00049 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
00050 
00051 namespace llvm {  
00052   
00053 // Fill-ins to make the compiler happy.  These constants are never actually
00054 //   assigned; they are just filler to make an automatically-generated switch
00055 //   statement work.
00056 namespace X86 {
00057   enum {
00058     BX_SI = 500,
00059     BX_DI = 501,
00060     BP_SI = 502,
00061     BP_DI = 503,
00062     sib   = 504,
00063     sib64 = 505
00064   };
00065 }
00066 
00067 extern Target TheX86_32Target, TheX86_64Target;
00068 
00069 }
00070 
00071 static bool translateInstruction(MCInst &target,
00072                                 InternalInstruction &source,
00073                                 const MCDisassembler *Dis);
00074 
00075 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
00076                                                DisassemblerMode mode,
00077                                                const MCInstrInfo *MII)
00078   : MCDisassembler(STI), MII(MII), fMode(mode) {}
00079 
00080 X86GenericDisassembler::~X86GenericDisassembler() {
00081   delete MII;
00082 }
00083 
00084 /// regionReader - a callback function that wraps the readByte method from
00085 ///   MemoryObject.
00086 ///
00087 /// @param arg      - The generic callback parameter.  In this case, this should
00088 ///                   be a pointer to a MemoryObject.
00089 /// @param byte     - A pointer to the byte to be read.
00090 /// @param address  - The address to be read.
00091 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
00092   const MemoryObject* region = static_cast<const MemoryObject*>(arg);
00093   return region->readByte(address, byte);
00094 }
00095 
00096 /// logger - a callback function that wraps the operator<< method from
00097 ///   raw_ostream.
00098 ///
00099 /// @param arg      - The generic callback parameter.  This should be a pointe
00100 ///                   to a raw_ostream.
00101 /// @param log      - A string to be logged.  logger() adds a newline.
00102 static void logger(void* arg, const char* log) {
00103   if (!arg)
00104     return;
00105   
00106   raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
00107   vStream << log << "\n";
00108 }  
00109   
00110 //
00111 // Public interface for the disassembler
00112 //
00113 
00114 MCDisassembler::DecodeStatus
00115 X86GenericDisassembler::getInstruction(MCInst &instr,
00116                                        uint64_t &size,
00117                                        const MemoryObject &region,
00118                                        uint64_t address,
00119                                        raw_ostream &vStream,
00120                                        raw_ostream &cStream) const {
00121   CommentStream = &cStream;
00122 
00123   InternalInstruction internalInstr;
00124 
00125   dlog_t loggerFn = logger;
00126   if (&vStream == &nulls())
00127     loggerFn = 0; // Disable logging completely if it's going to nulls().
00128   
00129   int ret = decodeInstruction(&internalInstr,
00130                               regionReader,
00131                               (const void*)&region,
00132                               loggerFn,
00133                               (void*)&vStream,
00134                               (const void*)MII,
00135                               address,
00136                               fMode);
00137 
00138   if (ret) {
00139     size = internalInstr.readerCursor - address;
00140     return Fail;
00141   }
00142   else {
00143     size = internalInstr.length;
00144     return (!translateInstruction(instr, internalInstr, this)) ?
00145             Success : Fail;
00146   }
00147 }
00148 
00149 //
00150 // Private code that translates from struct InternalInstructions to MCInsts.
00151 //
00152 
00153 /// translateRegister - Translates an internal register to the appropriate LLVM
00154 ///   register, and appends it as an operand to an MCInst.
00155 ///
00156 /// @param mcInst     - The MCInst to append to.
00157 /// @param reg        - The Reg to append.
00158 static void translateRegister(MCInst &mcInst, Reg reg) {
00159 #define ENTRY(x) X86::x,
00160   uint8_t llvmRegnums[] = {
00161     ALL_REGS
00162     0
00163   };
00164 #undef ENTRY
00165 
00166   uint8_t llvmRegnum = llvmRegnums[reg];
00167   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
00168 }
00169 
00170 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
00171 /// immediate Value in the MCInst. 
00172 ///
00173 /// @param Value      - The immediate Value, has had any PC adjustment made by
00174 ///                     the caller.
00175 /// @param isBranch   - If the instruction is a branch instruction
00176 /// @param Address    - The starting address of the instruction
00177 /// @param Offset     - The byte offset to this immediate in the instruction
00178 /// @param Width      - The byte width of this immediate in the instruction
00179 ///
00180 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
00181 /// called then that function is called to get any symbolic information for the
00182 /// immediate in the instruction using the Address, Offset and Width.  If that
00183 /// returns non-zero then the symbolic information it returns is used to create 
00184 /// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
00185 /// returns zero and isBranch is true then a symbol look up for immediate Value
00186 /// is done and if a symbol is found an MCExpr is created with that, else
00187 /// an MCExpr with the immediate Value is created.  This function returns true
00188 /// if it adds an operand to the MCInst and false otherwise.
00189 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
00190                                      uint64_t Address, uint64_t Offset,
00191                                      uint64_t Width, MCInst &MI, 
00192                                      const MCDisassembler *Dis) {  
00193   LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
00194   struct LLVMOpInfo1 SymbolicOp;
00195   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
00196   SymbolicOp.Value = Value;
00197   void *DisInfo = Dis->getDisInfoBlock();
00198 
00199   if (!getOpInfo ||
00200       !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
00201     // Clear SymbolicOp.Value from above and also all other fields.
00202     memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
00203     LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
00204     if (!SymbolLookUp)
00205       return false;
00206     uint64_t ReferenceType;
00207     if (isBranch)
00208        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
00209     else
00210        ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
00211     const char *ReferenceName;
00212     const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
00213                                     &ReferenceName);
00214     if (Name) {
00215       SymbolicOp.AddSymbol.Name = Name;
00216       SymbolicOp.AddSymbol.Present = true;
00217     }
00218     // For branches always create an MCExpr so it gets printed as hex address.
00219     else if (isBranch) {
00220       SymbolicOp.Value = Value;
00221     }
00222     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
00223       (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
00224     if (!Name && !isBranch)
00225       return false;
00226   }
00227 
00228   MCContext *Ctx = Dis->getMCContext();
00229   const MCExpr *Add = NULL;
00230   if (SymbolicOp.AddSymbol.Present) {
00231     if (SymbolicOp.AddSymbol.Name) {
00232       StringRef Name(SymbolicOp.AddSymbol.Name);
00233       MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
00234       Add = MCSymbolRefExpr::Create(Sym, *Ctx);
00235     } else {
00236       Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
00237     }
00238   }
00239 
00240   const MCExpr *Sub = NULL;
00241   if (SymbolicOp.SubtractSymbol.Present) {
00242       if (SymbolicOp.SubtractSymbol.Name) {
00243       StringRef Name(SymbolicOp.SubtractSymbol.Name);
00244       MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
00245       Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
00246     } else {
00247       Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
00248     }
00249   }
00250 
00251   const MCExpr *Off = NULL;
00252   if (SymbolicOp.Value != 0)
00253     Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
00254 
00255   const MCExpr *Expr;
00256   if (Sub) {
00257     const MCExpr *LHS;
00258     if (Add)
00259       LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
00260     else
00261       LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
00262     if (Off != 0)
00263       Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
00264     else
00265       Expr = LHS;
00266   } else if (Add) {
00267     if (Off != 0)
00268       Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
00269     else
00270       Expr = Add;
00271   } else {
00272     if (Off != 0)
00273       Expr = Off;
00274     else
00275       Expr = MCConstantExpr::Create(0, *Ctx);
00276   }
00277 
00278   MI.addOperand(MCOperand::CreateExpr(Expr));
00279 
00280   return true;
00281 }
00282 
00283 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
00284 /// referenced by a load instruction with the base register that is the rip.
00285 /// These can often be addresses in a literal pool.  The Address of the
00286 /// instruction and its immediate Value are used to determine the address
00287 /// being referenced in the literal pool entry.  The SymbolLookUp call back will
00288 /// return a pointer to a literal 'C' string if the referenced address is an 
00289 /// address into a section with 'C' string literals.
00290 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
00291                                             const void *Decoder) {
00292   const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
00293   LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
00294   if (SymbolLookUp) {
00295     void *DisInfo = Dis->getDisInfoBlock();
00296     uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
00297     const char *ReferenceName;
00298     (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
00299     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
00300       (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
00301   }
00302 }
00303 
00304 /// translateImmediate  - Appends an immediate operand to an MCInst.
00305 ///
00306 /// @param mcInst       - The MCInst to append to.
00307 /// @param immediate    - The immediate value to append.
00308 /// @param operand      - The operand, as stored in the descriptor table.
00309 /// @param insn         - The internal instruction.
00310 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
00311                                const OperandSpecifier &operand,
00312                                InternalInstruction &insn,
00313                                const MCDisassembler *Dis) {  
00314   // Sign-extend the immediate if necessary.
00315 
00316   OperandType type = (OperandType)operand.type;
00317 
00318   bool isBranch = false;
00319   uint64_t pcrel = 0;
00320   if (type == TYPE_RELv) {
00321     isBranch = true;
00322     pcrel = insn.startLocation +
00323             insn.immediateOffset + insn.immediateSize;
00324     switch (insn.displacementSize) {
00325     default:
00326       break;
00327     case 1:
00328       type = TYPE_MOFFS8;
00329       break;
00330     case 2:
00331       type = TYPE_MOFFS16;
00332       break;
00333     case 4:
00334       type = TYPE_MOFFS32;
00335       break;
00336     case 8:
00337       type = TYPE_MOFFS64;
00338       break;
00339     }
00340   }
00341   // By default sign-extend all X86 immediates based on their encoding.
00342   else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
00343            type == TYPE_IMM64) {
00344     uint32_t Opcode = mcInst.getOpcode();
00345     switch (operand.encoding) {
00346     default:
00347       break;
00348     case ENCODING_IB:
00349       // Special case those X86 instructions that use the imm8 as a set of
00350       // bits, bit count, etc. and are not sign-extend.
00351       if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
00352           Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
00353           Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
00354           Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
00355           Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
00356           Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
00357           Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
00358           Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
00359           Opcode != X86::VINSERTPSrr)
00360         type = TYPE_MOFFS8;
00361       break;
00362     case ENCODING_IW:
00363       type = TYPE_MOFFS16;
00364       break;
00365     case ENCODING_ID:
00366       type = TYPE_MOFFS32;
00367       break;
00368     case ENCODING_IO:
00369       type = TYPE_MOFFS64;
00370       break;
00371     }
00372   }
00373 
00374   switch (type) {
00375   case TYPE_XMM32:
00376   case TYPE_XMM64:
00377   case TYPE_XMM128:
00378     mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
00379     return;
00380   case TYPE_XMM256:
00381     mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
00382     return;
00383   case TYPE_REL8:
00384     isBranch = true;
00385     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
00386     // fall through to sign extend the immediate if needed.
00387   case TYPE_MOFFS8:
00388     if(immediate & 0x80)
00389       immediate |= ~(0xffull);
00390     break;
00391   case TYPE_MOFFS16:
00392     if(immediate & 0x8000)
00393       immediate |= ~(0xffffull);
00394     break;
00395   case TYPE_REL32:
00396   case TYPE_REL64:
00397     isBranch = true;
00398     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
00399     // fall through to sign extend the immediate if needed.
00400   case TYPE_MOFFS32:
00401     if(immediate & 0x80000000)
00402       immediate |= ~(0xffffffffull);
00403     break;
00404   case TYPE_MOFFS64:
00405   default:
00406     // operand is 64 bits wide.  Do nothing.
00407     break;
00408   }
00409     
00410   if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
00411                                insn.immediateOffset, insn.immediateSize,
00412                                mcInst, Dis))
00413     mcInst.addOperand(MCOperand::CreateImm(immediate));
00414 }
00415 
00416 /// translateRMRegister - Translates a register stored in the R/M field of the
00417 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
00418 /// @param mcInst       - The MCInst to append to.
00419 /// @param insn         - The internal instruction to extract the R/M field
00420 ///                       from.
00421 /// @return             - 0 on success; -1 otherwise
00422 static bool translateRMRegister(MCInst &mcInst,
00423                                 InternalInstruction &insn) {
00424   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
00425     debug("A R/M register operand may not have a SIB byte");
00426     return true;
00427   }
00428   
00429   switch (insn.eaBase) {
00430   default:
00431     debug("Unexpected EA base register");
00432     return true;
00433   case EA_BASE_NONE:
00434     debug("EA_BASE_NONE for ModR/M base");
00435     return true;
00436 #define ENTRY(x) case EA_BASE_##x:
00437   ALL_EA_BASES
00438 #undef ENTRY
00439     debug("A R/M register operand may not have a base; "
00440           "the operand must be a register.");
00441     return true;
00442 #define ENTRY(x)                                                      \
00443   case EA_REG_##x:                                                    \
00444     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
00445   ALL_REGS
00446 #undef ENTRY
00447   }
00448   
00449   return false;
00450 }
00451 
00452 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
00453 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
00454 ///   operand in LLVM's format, and appends it to an MCInst.
00455 ///
00456 /// @param mcInst       - The MCInst to append to.
00457 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
00458 ///                       from.
00459 /// @return             - 0 on success; nonzero otherwise
00460 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
00461                               const MCDisassembler *Dis) {  
00462   // Addresses in an MCInst are represented as five operands:
00463   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
00464   //                                SIB base
00465   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified 
00466   //                                scale amount
00467   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
00468   //                                the index (which is multiplied by the 
00469   //                                scale amount)
00470   //   4. displacement  (immediate) 0, or the displacement if there is one
00471   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
00472   //                                if we have segment overrides
00473   
00474   MCOperand baseReg;
00475   MCOperand scaleAmount;
00476   MCOperand indexReg;
00477   MCOperand displacement;
00478   MCOperand segmentReg;
00479   uint64_t pcrel = 0;
00480   
00481   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
00482     if (insn.sibBase != SIB_BASE_NONE) {
00483       switch (insn.sibBase) {
00484       default:
00485         debug("Unexpected sibBase");
00486         return true;
00487 #define ENTRY(x)                                          \
00488       case SIB_BASE_##x:                                  \
00489         baseReg = MCOperand::CreateReg(X86::x); break;
00490       ALL_SIB_BASES
00491 #undef ENTRY
00492       }
00493     } else {
00494       baseReg = MCOperand::CreateReg(0);
00495     }
00496 
00497     // Check whether we are handling VSIB addressing mode for GATHER.
00498     // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
00499     // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
00500     // I don't see a way to get the correct IndexReg in readSIB:
00501     //   We can tell whether it is VSIB or SIB after instruction ID is decoded,
00502     //   but instruction ID may not be decoded yet when calling readSIB.
00503     uint32_t Opcode = mcInst.getOpcode();
00504     bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
00505                        Opcode == X86::VGATHERDPDYrm ||
00506                        Opcode == X86::VGATHERQPDrm ||
00507                        Opcode == X86::VGATHERDPSrm ||
00508                        Opcode == X86::VGATHERQPSrm ||
00509                        Opcode == X86::VPGATHERDQrm ||
00510                        Opcode == X86::VPGATHERDQYrm ||
00511                        Opcode == X86::VPGATHERQQrm ||
00512                        Opcode == X86::VPGATHERDDrm ||
00513                        Opcode == X86::VPGATHERQDrm);
00514     bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
00515                        Opcode == X86::VGATHERDPSYrm ||
00516                        Opcode == X86::VGATHERQPSYrm ||
00517                        Opcode == X86::VPGATHERQQYrm ||
00518                        Opcode == X86::VPGATHERDDYrm ||
00519                        Opcode == X86::VPGATHERQDYrm);
00520     if (IndexIs128 || IndexIs256) {
00521       unsigned IndexOffset = insn.sibIndex -
00522                          (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
00523       SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
00524       insn.sibIndex = (SIBIndex)(IndexBase + 
00525                            (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
00526     }
00527 
00528     if (insn.sibIndex != SIB_INDEX_NONE) {
00529       switch (insn.sibIndex) {
00530       default:
00531         debug("Unexpected sibIndex");
00532         return true;
00533 #define ENTRY(x)                                          \
00534       case SIB_INDEX_##x:                                 \
00535         indexReg = MCOperand::CreateReg(X86::x); break;
00536       EA_BASES_32BIT
00537       EA_BASES_64BIT
00538       REGS_XMM
00539       REGS_YMM
00540 #undef ENTRY
00541       }
00542     } else {
00543       indexReg = MCOperand::CreateReg(0);
00544     }
00545     
00546     scaleAmount = MCOperand::CreateImm(insn.sibScale);
00547   } else {
00548     switch (insn.eaBase) {
00549     case EA_BASE_NONE:
00550       if (insn.eaDisplacement == EA_DISP_NONE) {
00551         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
00552         return true;
00553       }
00554       if (insn.mode == MODE_64BIT){
00555         pcrel = insn.startLocation +
00556                 insn.displacementOffset + insn.displacementSize;
00557         tryAddingPcLoadReferenceComment(insn.startLocation +
00558                                         insn.displacementOffset,
00559                                         insn.displacement + pcrel, Dis);
00560         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
00561       }
00562       else
00563         baseReg = MCOperand::CreateReg(0);
00564       
00565       indexReg = MCOperand::CreateReg(0);
00566       break;
00567     case EA_BASE_BX_SI:
00568       baseReg = MCOperand::CreateReg(X86::BX);
00569       indexReg = MCOperand::CreateReg(X86::SI);
00570       break;
00571     case EA_BASE_BX_DI:
00572       baseReg = MCOperand::CreateReg(X86::BX);
00573       indexReg = MCOperand::CreateReg(X86::DI);
00574       break;
00575     case EA_BASE_BP_SI:
00576       baseReg = MCOperand::CreateReg(X86::BP);
00577       indexReg = MCOperand::CreateReg(X86::SI);
00578       break;
00579     case EA_BASE_BP_DI:
00580       baseReg = MCOperand::CreateReg(X86::BP);
00581       indexReg = MCOperand::CreateReg(X86::DI);
00582       break;
00583     default:
00584       indexReg = MCOperand::CreateReg(0);
00585       switch (insn.eaBase) {
00586       default:
00587         debug("Unexpected eaBase");
00588         return true;
00589         // Here, we will use the fill-ins defined above.  However,
00590         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
00591         //   sib and sib64 were handled in the top-level if, so they're only
00592         //   placeholders to keep the compiler happy.
00593 #define ENTRY(x)                                        \
00594       case EA_BASE_##x:                                 \
00595         baseReg = MCOperand::CreateReg(X86::x); break; 
00596       ALL_EA_BASES
00597 #undef ENTRY
00598 #define ENTRY(x) case EA_REG_##x:
00599       ALL_REGS
00600 #undef ENTRY
00601         debug("A R/M memory operand may not be a register; "
00602               "the base field must be a base.");
00603         return true;
00604       }
00605     }
00606     
00607     scaleAmount = MCOperand::CreateImm(1);
00608   }
00609   
00610   displacement = MCOperand::CreateImm(insn.displacement);
00611   
00612   static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
00613     0,        // SEG_OVERRIDE_NONE
00614     X86::CS,
00615     X86::SS,
00616     X86::DS,
00617     X86::ES,
00618     X86::FS,
00619     X86::GS
00620   };
00621   
00622   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
00623   
00624   mcInst.addOperand(baseReg);
00625   mcInst.addOperand(scaleAmount);
00626   mcInst.addOperand(indexReg);
00627   if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
00628                                insn.startLocation, insn.displacementOffset,
00629                                insn.displacementSize, mcInst, Dis))
00630     mcInst.addOperand(displacement);
00631   mcInst.addOperand(segmentReg);
00632   return false;
00633 }
00634 
00635 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
00636 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
00637 ///
00638 /// @param mcInst       - The MCInst to append to.
00639 /// @param operand      - The operand, as stored in the descriptor table.
00640 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
00641 ///                       from.
00642 /// @return             - 0 on success; nonzero otherwise
00643 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
00644                         InternalInstruction &insn, const MCDisassembler *Dis) {  
00645   switch (operand.type) {
00646   default:
00647     debug("Unexpected type for a R/M operand");
00648     return true;
00649   case TYPE_R8:
00650   case TYPE_R16:
00651   case TYPE_R32:
00652   case TYPE_R64:
00653   case TYPE_Rv:
00654   case TYPE_MM:
00655   case TYPE_MM32:
00656   case TYPE_MM64:
00657   case TYPE_XMM:
00658   case TYPE_XMM32:
00659   case TYPE_XMM64:
00660   case TYPE_XMM128:
00661   case TYPE_XMM256:
00662   case TYPE_DEBUGREG:
00663   case TYPE_CONTROLREG:
00664     return translateRMRegister(mcInst, insn);
00665   case TYPE_M:
00666   case TYPE_M8:
00667   case TYPE_M16:
00668   case TYPE_M32:
00669   case TYPE_M64:
00670   case TYPE_M128:
00671   case TYPE_M256:
00672   case TYPE_M512:
00673   case TYPE_Mv:
00674   case TYPE_M32FP:
00675   case TYPE_M64FP:
00676   case TYPE_M80FP:
00677   case TYPE_M16INT:
00678   case TYPE_M32INT:
00679   case TYPE_M64INT:
00680   case TYPE_M1616:
00681   case TYPE_M1632:
00682   case TYPE_M1664:
00683   case TYPE_LEA:
00684     return translateRMMemory(mcInst, insn, Dis);
00685   }
00686 }
00687   
00688 /// translateFPRegister - Translates a stack position on the FPU stack to its
00689 ///   LLVM form, and appends it to an MCInst.
00690 ///
00691 /// @param mcInst       - The MCInst to append to.
00692 /// @param stackPos     - The stack position to translate.
00693 /// @return             - 0 on success; nonzero otherwise.
00694 static bool translateFPRegister(MCInst &mcInst,
00695                                uint8_t stackPos) {
00696   if (stackPos >= 8) {
00697     debug("Invalid FP stack position");
00698     return true;
00699   }
00700   
00701   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
00702 
00703   return false;
00704 }
00705 
00706 /// translateOperand - Translates an operand stored in an internal instruction 
00707 ///   to LLVM's format and appends it to an MCInst.
00708 ///
00709 /// @param mcInst       - The MCInst to append to.
00710 /// @param operand      - The operand, as stored in the descriptor table.
00711 /// @param insn         - The internal instruction.
00712 /// @return             - false on success; true otherwise.
00713 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
00714                              InternalInstruction &insn,
00715                              const MCDisassembler *Dis) {  
00716   switch (operand.encoding) {
00717   default:
00718     debug("Unhandled operand encoding during translation");
00719     return true;
00720   case ENCODING_REG:
00721     translateRegister(mcInst, insn.reg);
00722     return false;
00723   case ENCODING_RM:
00724     return translateRM(mcInst, operand, insn, Dis);
00725   case ENCODING_CB:
00726   case ENCODING_CW:
00727   case ENCODING_CD:
00728   case ENCODING_CP:
00729   case ENCODING_CO:
00730   case ENCODING_CT:
00731     debug("Translation of code offsets isn't supported.");
00732     return true;
00733   case ENCODING_IB:
00734   case ENCODING_IW:
00735   case ENCODING_ID:
00736   case ENCODING_IO:
00737   case ENCODING_Iv:
00738   case ENCODING_Ia:
00739     translateImmediate(mcInst,
00740                        insn.immediates[insn.numImmediatesTranslated++],
00741                        operand,
00742                        insn,
00743                        Dis);
00744     return false;
00745   case ENCODING_RB:
00746   case ENCODING_RW:
00747   case ENCODING_RD:
00748   case ENCODING_RO:
00749     translateRegister(mcInst, insn.opcodeRegister);
00750     return false;
00751   case ENCODING_I:
00752     return translateFPRegister(mcInst, insn.opcodeModifier);
00753   case ENCODING_Rv:
00754     translateRegister(mcInst, insn.opcodeRegister);
00755     return false;
00756   case ENCODING_VVVV:
00757     translateRegister(mcInst, insn.vvvv);
00758     return false;
00759   case ENCODING_DUP:
00760     return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
00761                             insn, Dis);
00762   }
00763 }
00764   
00765 /// translateInstruction - Translates an internal instruction and all its
00766 ///   operands to an MCInst.
00767 ///
00768 /// @param mcInst       - The MCInst to populate with the instruction's data.
00769 /// @param insn         - The internal instruction.
00770 /// @return             - false on success; true otherwise.
00771 static bool translateInstruction(MCInst &mcInst,
00772                                 InternalInstruction &insn,
00773                                 const MCDisassembler *Dis) {  
00774   if (!insn.spec) {
00775     debug("Instruction has no specification");
00776     return true;
00777   }
00778   
00779   mcInst.setOpcode(insn.instructionID);
00780   
00781   int index;
00782   
00783   insn.numImmediatesTranslated = 0;
00784   
00785   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
00786     if (insn.operands[index].encoding != ENCODING_NONE) {
00787       if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
00788         return true;
00789       }
00790     }
00791   }
00792   
00793   return false;
00794 }
00795 
00796 static MCDisassembler *createX86_32Disassembler(const Target &T,
00797                                                 const MCSubtargetInfo &STI) {
00798   return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
00799                                                      T.createMCInstrInfo());
00800 }
00801 
00802 static MCDisassembler *createX86_64Disassembler(const Target &T,
00803                                                 const MCSubtargetInfo &STI) {
00804   return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
00805                                                      T.createMCInstrInfo());
00806 }
00807 
00808 extern "C" void LLVMInitializeX86Disassembler() { 
00809   // Register the disassembler.
00810   TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 
00811                                          createX86_32Disassembler);
00812   TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
00813                                          createX86_64Disassembler);
00814 }