LLVM API Documentation
00001 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file is part of the X86 Disassembler. 00011 // It contains code to translate the data produced by the decoder into 00012 // MCInsts. 00013 // Documentation for the disassembler can be found in X86Disassembler.h. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "X86Disassembler.h" 00018 #include "X86DisassemblerDecoder.h" 00019 #include "llvm/MC/MCContext.h" 00020 #include "llvm/MC/MCDisassembler.h" 00021 #include "llvm/MC/MCExpr.h" 00022 #include "llvm/MC/MCInst.h" 00023 #include "llvm/MC/MCInstrInfo.h" 00024 #include "llvm/MC/MCSubtargetInfo.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/MemoryObject.h" 00027 #include "llvm/Support/TargetRegistry.h" 00028 #include "llvm/Support/raw_ostream.h" 00029 00030 #define GET_REGINFO_ENUM 00031 #include "X86GenRegisterInfo.inc" 00032 #define GET_INSTRINFO_ENUM 00033 #include "X86GenInstrInfo.inc" 00034 00035 using namespace llvm; 00036 using namespace llvm::X86Disassembler; 00037 00038 void x86DisassemblerDebug(const char *file, 00039 unsigned line, 00040 const char *s) { 00041 dbgs() << file << ":" << line << ": " << s; 00042 } 00043 00044 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { 00045 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 00046 return MII->getName(Opcode); 00047 } 00048 00049 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); 00050 00051 namespace llvm { 00052 00053 // Fill-ins to make the compiler happy. These constants are never actually 00054 // assigned; they are just filler to make an automatically-generated switch 00055 // statement work. 00056 namespace X86 { 00057 enum { 00058 BX_SI = 500, 00059 BX_DI = 501, 00060 BP_SI = 502, 00061 BP_DI = 503, 00062 sib = 504, 00063 sib64 = 505 00064 }; 00065 } 00066 00067 extern Target TheX86_32Target, TheX86_64Target; 00068 00069 } 00070 00071 static bool translateInstruction(MCInst &target, 00072 InternalInstruction &source, 00073 const MCDisassembler *Dis); 00074 00075 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, 00076 DisassemblerMode mode, 00077 const MCInstrInfo *MII) 00078 : MCDisassembler(STI), MII(MII), fMode(mode) {} 00079 00080 X86GenericDisassembler::~X86GenericDisassembler() { 00081 delete MII; 00082 } 00083 00084 /// regionReader - a callback function that wraps the readByte method from 00085 /// MemoryObject. 00086 /// 00087 /// @param arg - The generic callback parameter. In this case, this should 00088 /// be a pointer to a MemoryObject. 00089 /// @param byte - A pointer to the byte to be read. 00090 /// @param address - The address to be read. 00091 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { 00092 const MemoryObject* region = static_cast<const MemoryObject*>(arg); 00093 return region->readByte(address, byte); 00094 } 00095 00096 /// logger - a callback function that wraps the operator<< method from 00097 /// raw_ostream. 00098 /// 00099 /// @param arg - The generic callback parameter. This should be a pointe 00100 /// to a raw_ostream. 00101 /// @param log - A string to be logged. logger() adds a newline. 00102 static void logger(void* arg, const char* log) { 00103 if (!arg) 00104 return; 00105 00106 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 00107 vStream << log << "\n"; 00108 } 00109 00110 // 00111 // Public interface for the disassembler 00112 // 00113 00114 MCDisassembler::DecodeStatus 00115 X86GenericDisassembler::getInstruction(MCInst &instr, 00116 uint64_t &size, 00117 const MemoryObject ®ion, 00118 uint64_t address, 00119 raw_ostream &vStream, 00120 raw_ostream &cStream) const { 00121 CommentStream = &cStream; 00122 00123 InternalInstruction internalInstr; 00124 00125 dlog_t loggerFn = logger; 00126 if (&vStream == &nulls()) 00127 loggerFn = 0; // Disable logging completely if it's going to nulls(). 00128 00129 int ret = decodeInstruction(&internalInstr, 00130 regionReader, 00131 (const void*)®ion, 00132 loggerFn, 00133 (void*)&vStream, 00134 (const void*)MII, 00135 address, 00136 fMode); 00137 00138 if (ret) { 00139 size = internalInstr.readerCursor - address; 00140 return Fail; 00141 } 00142 else { 00143 size = internalInstr.length; 00144 return (!translateInstruction(instr, internalInstr, this)) ? 00145 Success : Fail; 00146 } 00147 } 00148 00149 // 00150 // Private code that translates from struct InternalInstructions to MCInsts. 00151 // 00152 00153 /// translateRegister - Translates an internal register to the appropriate LLVM 00154 /// register, and appends it as an operand to an MCInst. 00155 /// 00156 /// @param mcInst - The MCInst to append to. 00157 /// @param reg - The Reg to append. 00158 static void translateRegister(MCInst &mcInst, Reg reg) { 00159 #define ENTRY(x) X86::x, 00160 uint8_t llvmRegnums[] = { 00161 ALL_REGS 00162 0 00163 }; 00164 #undef ENTRY 00165 00166 uint8_t llvmRegnum = llvmRegnums[reg]; 00167 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 00168 } 00169 00170 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 00171 /// immediate Value in the MCInst. 00172 /// 00173 /// @param Value - The immediate Value, has had any PC adjustment made by 00174 /// the caller. 00175 /// @param isBranch - If the instruction is a branch instruction 00176 /// @param Address - The starting address of the instruction 00177 /// @param Offset - The byte offset to this immediate in the instruction 00178 /// @param Width - The byte width of this immediate in the instruction 00179 /// 00180 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 00181 /// called then that function is called to get any symbolic information for the 00182 /// immediate in the instruction using the Address, Offset and Width. If that 00183 /// returns non-zero then the symbolic information it returns is used to create 00184 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 00185 /// returns zero and isBranch is true then a symbol look up for immediate Value 00186 /// is done and if a symbol is found an MCExpr is created with that, else 00187 /// an MCExpr with the immediate Value is created. This function returns true 00188 /// if it adds an operand to the MCInst and false otherwise. 00189 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 00190 uint64_t Address, uint64_t Offset, 00191 uint64_t Width, MCInst &MI, 00192 const MCDisassembler *Dis) { 00193 LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); 00194 struct LLVMOpInfo1 SymbolicOp; 00195 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 00196 SymbolicOp.Value = Value; 00197 void *DisInfo = Dis->getDisInfoBlock(); 00198 00199 if (!getOpInfo || 00200 !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) { 00201 // Clear SymbolicOp.Value from above and also all other fields. 00202 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 00203 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); 00204 if (!SymbolLookUp) 00205 return false; 00206 uint64_t ReferenceType; 00207 if (isBranch) 00208 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 00209 else 00210 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; 00211 const char *ReferenceName; 00212 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, 00213 &ReferenceName); 00214 if (Name) { 00215 SymbolicOp.AddSymbol.Name = Name; 00216 SymbolicOp.AddSymbol.Present = true; 00217 } 00218 // For branches always create an MCExpr so it gets printed as hex address. 00219 else if (isBranch) { 00220 SymbolicOp.Value = Value; 00221 } 00222 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 00223 (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; 00224 if (!Name && !isBranch) 00225 return false; 00226 } 00227 00228 MCContext *Ctx = Dis->getMCContext(); 00229 const MCExpr *Add = NULL; 00230 if (SymbolicOp.AddSymbol.Present) { 00231 if (SymbolicOp.AddSymbol.Name) { 00232 StringRef Name(SymbolicOp.AddSymbol.Name); 00233 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); 00234 Add = MCSymbolRefExpr::Create(Sym, *Ctx); 00235 } else { 00236 Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx); 00237 } 00238 } 00239 00240 const MCExpr *Sub = NULL; 00241 if (SymbolicOp.SubtractSymbol.Present) { 00242 if (SymbolicOp.SubtractSymbol.Name) { 00243 StringRef Name(SymbolicOp.SubtractSymbol.Name); 00244 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); 00245 Sub = MCSymbolRefExpr::Create(Sym, *Ctx); 00246 } else { 00247 Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx); 00248 } 00249 } 00250 00251 const MCExpr *Off = NULL; 00252 if (SymbolicOp.Value != 0) 00253 Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); 00254 00255 const MCExpr *Expr; 00256 if (Sub) { 00257 const MCExpr *LHS; 00258 if (Add) 00259 LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); 00260 else 00261 LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); 00262 if (Off != 0) 00263 Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); 00264 else 00265 Expr = LHS; 00266 } else if (Add) { 00267 if (Off != 0) 00268 Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); 00269 else 00270 Expr = Add; 00271 } else { 00272 if (Off != 0) 00273 Expr = Off; 00274 else 00275 Expr = MCConstantExpr::Create(0, *Ctx); 00276 } 00277 00278 MI.addOperand(MCOperand::CreateExpr(Expr)); 00279 00280 return true; 00281 } 00282 00283 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 00284 /// referenced by a load instruction with the base register that is the rip. 00285 /// These can often be addresses in a literal pool. The Address of the 00286 /// instruction and its immediate Value are used to determine the address 00287 /// being referenced in the literal pool entry. The SymbolLookUp call back will 00288 /// return a pointer to a literal 'C' string if the referenced address is an 00289 /// address into a section with 'C' string literals. 00290 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 00291 const void *Decoder) { 00292 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 00293 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); 00294 if (SymbolLookUp) { 00295 void *DisInfo = Dis->getDisInfoBlock(); 00296 uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; 00297 const char *ReferenceName; 00298 (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); 00299 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) 00300 (*Dis->CommentStream) << "literal pool for: " << ReferenceName; 00301 } 00302 } 00303 00304 /// translateImmediate - Appends an immediate operand to an MCInst. 00305 /// 00306 /// @param mcInst - The MCInst to append to. 00307 /// @param immediate - The immediate value to append. 00308 /// @param operand - The operand, as stored in the descriptor table. 00309 /// @param insn - The internal instruction. 00310 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 00311 const OperandSpecifier &operand, 00312 InternalInstruction &insn, 00313 const MCDisassembler *Dis) { 00314 // Sign-extend the immediate if necessary. 00315 00316 OperandType type = (OperandType)operand.type; 00317 00318 bool isBranch = false; 00319 uint64_t pcrel = 0; 00320 if (type == TYPE_RELv) { 00321 isBranch = true; 00322 pcrel = insn.startLocation + 00323 insn.immediateOffset + insn.immediateSize; 00324 switch (insn.displacementSize) { 00325 default: 00326 break; 00327 case 1: 00328 type = TYPE_MOFFS8; 00329 break; 00330 case 2: 00331 type = TYPE_MOFFS16; 00332 break; 00333 case 4: 00334 type = TYPE_MOFFS32; 00335 break; 00336 case 8: 00337 type = TYPE_MOFFS64; 00338 break; 00339 } 00340 } 00341 // By default sign-extend all X86 immediates based on their encoding. 00342 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 00343 type == TYPE_IMM64) { 00344 uint32_t Opcode = mcInst.getOpcode(); 00345 switch (operand.encoding) { 00346 default: 00347 break; 00348 case ENCODING_IB: 00349 // Special case those X86 instructions that use the imm8 as a set of 00350 // bits, bit count, etc. and are not sign-extend. 00351 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 00352 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 00353 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 00354 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 00355 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 00356 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 00357 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 00358 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 00359 Opcode != X86::VINSERTPSrr) 00360 type = TYPE_MOFFS8; 00361 break; 00362 case ENCODING_IW: 00363 type = TYPE_MOFFS16; 00364 break; 00365 case ENCODING_ID: 00366 type = TYPE_MOFFS32; 00367 break; 00368 case ENCODING_IO: 00369 type = TYPE_MOFFS64; 00370 break; 00371 } 00372 } 00373 00374 switch (type) { 00375 case TYPE_XMM32: 00376 case TYPE_XMM64: 00377 case TYPE_XMM128: 00378 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 00379 return; 00380 case TYPE_XMM256: 00381 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 00382 return; 00383 case TYPE_REL8: 00384 isBranch = true; 00385 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 00386 // fall through to sign extend the immediate if needed. 00387 case TYPE_MOFFS8: 00388 if(immediate & 0x80) 00389 immediate |= ~(0xffull); 00390 break; 00391 case TYPE_MOFFS16: 00392 if(immediate & 0x8000) 00393 immediate |= ~(0xffffull); 00394 break; 00395 case TYPE_REL32: 00396 case TYPE_REL64: 00397 isBranch = true; 00398 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 00399 // fall through to sign extend the immediate if needed. 00400 case TYPE_MOFFS32: 00401 if(immediate & 0x80000000) 00402 immediate |= ~(0xffffffffull); 00403 break; 00404 case TYPE_MOFFS64: 00405 default: 00406 // operand is 64 bits wide. Do nothing. 00407 break; 00408 } 00409 00410 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 00411 insn.immediateOffset, insn.immediateSize, 00412 mcInst, Dis)) 00413 mcInst.addOperand(MCOperand::CreateImm(immediate)); 00414 } 00415 00416 /// translateRMRegister - Translates a register stored in the R/M field of the 00417 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 00418 /// @param mcInst - The MCInst to append to. 00419 /// @param insn - The internal instruction to extract the R/M field 00420 /// from. 00421 /// @return - 0 on success; -1 otherwise 00422 static bool translateRMRegister(MCInst &mcInst, 00423 InternalInstruction &insn) { 00424 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 00425 debug("A R/M register operand may not have a SIB byte"); 00426 return true; 00427 } 00428 00429 switch (insn.eaBase) { 00430 default: 00431 debug("Unexpected EA base register"); 00432 return true; 00433 case EA_BASE_NONE: 00434 debug("EA_BASE_NONE for ModR/M base"); 00435 return true; 00436 #define ENTRY(x) case EA_BASE_##x: 00437 ALL_EA_BASES 00438 #undef ENTRY 00439 debug("A R/M register operand may not have a base; " 00440 "the operand must be a register."); 00441 return true; 00442 #define ENTRY(x) \ 00443 case EA_REG_##x: \ 00444 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 00445 ALL_REGS 00446 #undef ENTRY 00447 } 00448 00449 return false; 00450 } 00451 00452 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 00453 /// fields of an internal instruction (and possibly its SIB byte) to a memory 00454 /// operand in LLVM's format, and appends it to an MCInst. 00455 /// 00456 /// @param mcInst - The MCInst to append to. 00457 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 00458 /// from. 00459 /// @return - 0 on success; nonzero otherwise 00460 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 00461 const MCDisassembler *Dis) { 00462 // Addresses in an MCInst are represented as five operands: 00463 // 1. basereg (register) The R/M base, or (if there is a SIB) the 00464 // SIB base 00465 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 00466 // scale amount 00467 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 00468 // the index (which is multiplied by the 00469 // scale amount) 00470 // 4. displacement (immediate) 0, or the displacement if there is one 00471 // 5. segmentreg (register) x86_registerNONE for now, but could be set 00472 // if we have segment overrides 00473 00474 MCOperand baseReg; 00475 MCOperand scaleAmount; 00476 MCOperand indexReg; 00477 MCOperand displacement; 00478 MCOperand segmentReg; 00479 uint64_t pcrel = 0; 00480 00481 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 00482 if (insn.sibBase != SIB_BASE_NONE) { 00483 switch (insn.sibBase) { 00484 default: 00485 debug("Unexpected sibBase"); 00486 return true; 00487 #define ENTRY(x) \ 00488 case SIB_BASE_##x: \ 00489 baseReg = MCOperand::CreateReg(X86::x); break; 00490 ALL_SIB_BASES 00491 #undef ENTRY 00492 } 00493 } else { 00494 baseReg = MCOperand::CreateReg(0); 00495 } 00496 00497 // Check whether we are handling VSIB addressing mode for GATHER. 00498 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 00499 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 00500 // I don't see a way to get the correct IndexReg in readSIB: 00501 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 00502 // but instruction ID may not be decoded yet when calling readSIB. 00503 uint32_t Opcode = mcInst.getOpcode(); 00504 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 00505 Opcode == X86::VGATHERDPDYrm || 00506 Opcode == X86::VGATHERQPDrm || 00507 Opcode == X86::VGATHERDPSrm || 00508 Opcode == X86::VGATHERQPSrm || 00509 Opcode == X86::VPGATHERDQrm || 00510 Opcode == X86::VPGATHERDQYrm || 00511 Opcode == X86::VPGATHERQQrm || 00512 Opcode == X86::VPGATHERDDrm || 00513 Opcode == X86::VPGATHERQDrm); 00514 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 00515 Opcode == X86::VGATHERDPSYrm || 00516 Opcode == X86::VGATHERQPSYrm || 00517 Opcode == X86::VPGATHERQQYrm || 00518 Opcode == X86::VPGATHERDDYrm || 00519 Opcode == X86::VPGATHERQDYrm); 00520 if (IndexIs128 || IndexIs256) { 00521 unsigned IndexOffset = insn.sibIndex - 00522 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 00523 SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 00524 insn.sibIndex = (SIBIndex)(IndexBase + 00525 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 00526 } 00527 00528 if (insn.sibIndex != SIB_INDEX_NONE) { 00529 switch (insn.sibIndex) { 00530 default: 00531 debug("Unexpected sibIndex"); 00532 return true; 00533 #define ENTRY(x) \ 00534 case SIB_INDEX_##x: \ 00535 indexReg = MCOperand::CreateReg(X86::x); break; 00536 EA_BASES_32BIT 00537 EA_BASES_64BIT 00538 REGS_XMM 00539 REGS_YMM 00540 #undef ENTRY 00541 } 00542 } else { 00543 indexReg = MCOperand::CreateReg(0); 00544 } 00545 00546 scaleAmount = MCOperand::CreateImm(insn.sibScale); 00547 } else { 00548 switch (insn.eaBase) { 00549 case EA_BASE_NONE: 00550 if (insn.eaDisplacement == EA_DISP_NONE) { 00551 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 00552 return true; 00553 } 00554 if (insn.mode == MODE_64BIT){ 00555 pcrel = insn.startLocation + 00556 insn.displacementOffset + insn.displacementSize; 00557 tryAddingPcLoadReferenceComment(insn.startLocation + 00558 insn.displacementOffset, 00559 insn.displacement + pcrel, Dis); 00560 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 00561 } 00562 else 00563 baseReg = MCOperand::CreateReg(0); 00564 00565 indexReg = MCOperand::CreateReg(0); 00566 break; 00567 case EA_BASE_BX_SI: 00568 baseReg = MCOperand::CreateReg(X86::BX); 00569 indexReg = MCOperand::CreateReg(X86::SI); 00570 break; 00571 case EA_BASE_BX_DI: 00572 baseReg = MCOperand::CreateReg(X86::BX); 00573 indexReg = MCOperand::CreateReg(X86::DI); 00574 break; 00575 case EA_BASE_BP_SI: 00576 baseReg = MCOperand::CreateReg(X86::BP); 00577 indexReg = MCOperand::CreateReg(X86::SI); 00578 break; 00579 case EA_BASE_BP_DI: 00580 baseReg = MCOperand::CreateReg(X86::BP); 00581 indexReg = MCOperand::CreateReg(X86::DI); 00582 break; 00583 default: 00584 indexReg = MCOperand::CreateReg(0); 00585 switch (insn.eaBase) { 00586 default: 00587 debug("Unexpected eaBase"); 00588 return true; 00589 // Here, we will use the fill-ins defined above. However, 00590 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 00591 // sib and sib64 were handled in the top-level if, so they're only 00592 // placeholders to keep the compiler happy. 00593 #define ENTRY(x) \ 00594 case EA_BASE_##x: \ 00595 baseReg = MCOperand::CreateReg(X86::x); break; 00596 ALL_EA_BASES 00597 #undef ENTRY 00598 #define ENTRY(x) case EA_REG_##x: 00599 ALL_REGS 00600 #undef ENTRY 00601 debug("A R/M memory operand may not be a register; " 00602 "the base field must be a base."); 00603 return true; 00604 } 00605 } 00606 00607 scaleAmount = MCOperand::CreateImm(1); 00608 } 00609 00610 displacement = MCOperand::CreateImm(insn.displacement); 00611 00612 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 00613 0, // SEG_OVERRIDE_NONE 00614 X86::CS, 00615 X86::SS, 00616 X86::DS, 00617 X86::ES, 00618 X86::FS, 00619 X86::GS 00620 }; 00621 00622 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 00623 00624 mcInst.addOperand(baseReg); 00625 mcInst.addOperand(scaleAmount); 00626 mcInst.addOperand(indexReg); 00627 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 00628 insn.startLocation, insn.displacementOffset, 00629 insn.displacementSize, mcInst, Dis)) 00630 mcInst.addOperand(displacement); 00631 mcInst.addOperand(segmentReg); 00632 return false; 00633 } 00634 00635 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 00636 /// byte of an instruction to LLVM form, and appends it to an MCInst. 00637 /// 00638 /// @param mcInst - The MCInst to append to. 00639 /// @param operand - The operand, as stored in the descriptor table. 00640 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 00641 /// from. 00642 /// @return - 0 on success; nonzero otherwise 00643 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 00644 InternalInstruction &insn, const MCDisassembler *Dis) { 00645 switch (operand.type) { 00646 default: 00647 debug("Unexpected type for a R/M operand"); 00648 return true; 00649 case TYPE_R8: 00650 case TYPE_R16: 00651 case TYPE_R32: 00652 case TYPE_R64: 00653 case TYPE_Rv: 00654 case TYPE_MM: 00655 case TYPE_MM32: 00656 case TYPE_MM64: 00657 case TYPE_XMM: 00658 case TYPE_XMM32: 00659 case TYPE_XMM64: 00660 case TYPE_XMM128: 00661 case TYPE_XMM256: 00662 case TYPE_DEBUGREG: 00663 case TYPE_CONTROLREG: 00664 return translateRMRegister(mcInst, insn); 00665 case TYPE_M: 00666 case TYPE_M8: 00667 case TYPE_M16: 00668 case TYPE_M32: 00669 case TYPE_M64: 00670 case TYPE_M128: 00671 case TYPE_M256: 00672 case TYPE_M512: 00673 case TYPE_Mv: 00674 case TYPE_M32FP: 00675 case TYPE_M64FP: 00676 case TYPE_M80FP: 00677 case TYPE_M16INT: 00678 case TYPE_M32INT: 00679 case TYPE_M64INT: 00680 case TYPE_M1616: 00681 case TYPE_M1632: 00682 case TYPE_M1664: 00683 case TYPE_LEA: 00684 return translateRMMemory(mcInst, insn, Dis); 00685 } 00686 } 00687 00688 /// translateFPRegister - Translates a stack position on the FPU stack to its 00689 /// LLVM form, and appends it to an MCInst. 00690 /// 00691 /// @param mcInst - The MCInst to append to. 00692 /// @param stackPos - The stack position to translate. 00693 /// @return - 0 on success; nonzero otherwise. 00694 static bool translateFPRegister(MCInst &mcInst, 00695 uint8_t stackPos) { 00696 if (stackPos >= 8) { 00697 debug("Invalid FP stack position"); 00698 return true; 00699 } 00700 00701 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 00702 00703 return false; 00704 } 00705 00706 /// translateOperand - Translates an operand stored in an internal instruction 00707 /// to LLVM's format and appends it to an MCInst. 00708 /// 00709 /// @param mcInst - The MCInst to append to. 00710 /// @param operand - The operand, as stored in the descriptor table. 00711 /// @param insn - The internal instruction. 00712 /// @return - false on success; true otherwise. 00713 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 00714 InternalInstruction &insn, 00715 const MCDisassembler *Dis) { 00716 switch (operand.encoding) { 00717 default: 00718 debug("Unhandled operand encoding during translation"); 00719 return true; 00720 case ENCODING_REG: 00721 translateRegister(mcInst, insn.reg); 00722 return false; 00723 case ENCODING_RM: 00724 return translateRM(mcInst, operand, insn, Dis); 00725 case ENCODING_CB: 00726 case ENCODING_CW: 00727 case ENCODING_CD: 00728 case ENCODING_CP: 00729 case ENCODING_CO: 00730 case ENCODING_CT: 00731 debug("Translation of code offsets isn't supported."); 00732 return true; 00733 case ENCODING_IB: 00734 case ENCODING_IW: 00735 case ENCODING_ID: 00736 case ENCODING_IO: 00737 case ENCODING_Iv: 00738 case ENCODING_Ia: 00739 translateImmediate(mcInst, 00740 insn.immediates[insn.numImmediatesTranslated++], 00741 operand, 00742 insn, 00743 Dis); 00744 return false; 00745 case ENCODING_RB: 00746 case ENCODING_RW: 00747 case ENCODING_RD: 00748 case ENCODING_RO: 00749 translateRegister(mcInst, insn.opcodeRegister); 00750 return false; 00751 case ENCODING_I: 00752 return translateFPRegister(mcInst, insn.opcodeModifier); 00753 case ENCODING_Rv: 00754 translateRegister(mcInst, insn.opcodeRegister); 00755 return false; 00756 case ENCODING_VVVV: 00757 translateRegister(mcInst, insn.vvvv); 00758 return false; 00759 case ENCODING_DUP: 00760 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 00761 insn, Dis); 00762 } 00763 } 00764 00765 /// translateInstruction - Translates an internal instruction and all its 00766 /// operands to an MCInst. 00767 /// 00768 /// @param mcInst - The MCInst to populate with the instruction's data. 00769 /// @param insn - The internal instruction. 00770 /// @return - false on success; true otherwise. 00771 static bool translateInstruction(MCInst &mcInst, 00772 InternalInstruction &insn, 00773 const MCDisassembler *Dis) { 00774 if (!insn.spec) { 00775 debug("Instruction has no specification"); 00776 return true; 00777 } 00778 00779 mcInst.setOpcode(insn.instructionID); 00780 00781 int index; 00782 00783 insn.numImmediatesTranslated = 0; 00784 00785 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 00786 if (insn.operands[index].encoding != ENCODING_NONE) { 00787 if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { 00788 return true; 00789 } 00790 } 00791 } 00792 00793 return false; 00794 } 00795 00796 static MCDisassembler *createX86_32Disassembler(const Target &T, 00797 const MCSubtargetInfo &STI) { 00798 return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, 00799 T.createMCInstrInfo()); 00800 } 00801 00802 static MCDisassembler *createX86_64Disassembler(const Target &T, 00803 const MCSubtargetInfo &STI) { 00804 return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, 00805 T.createMCInstrInfo()); 00806 } 00807 00808 extern "C" void LLVMInitializeX86Disassembler() { 00809 // Register the disassembler. 00810 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 00811 createX86_32Disassembler); 00812 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 00813 createX86_64Disassembler); 00814 }