Line data Source code
1 : //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
11 : #include "llvm/MC/MCContext.h"
12 : #include "llvm/MC/MCExpr.h"
13 : #include "llvm/MC/MCInst.h"
14 : #include "llvm/Support/raw_ostream.h"
15 : #include <cstring>
16 :
17 : using namespace llvm;
18 :
19 : namespace llvm {
20 : class Triple;
21 : }
22 :
23 : // This function tries to add a symbolic operand in place of the immediate
24 : // Value in the MCInst. The immediate Value has had any PC adjustment made by
25 : // the caller. If the instruction is a branch instruction then IsBranch is true,
26 : // else false. If the getOpInfo() function was set as part of the
27 : // setupForSymbolicDisassembly() call then that function is called to get any
28 : // symbolic information at the Address for this instruction. If that returns
29 : // non-zero then the symbolic information it returns is used to create an MCExpr
30 : // and that is added as an operand to the MCInst. If getOpInfo() returns zero
31 : // and IsBranch is true then a symbol look up for Value is done and if a symbol
32 : // is found an MCExpr is created with that, else an MCExpr with Value is
33 : // created. This function returns true if it adds an operand to the MCInst and
34 : // false otherwise.
35 921 : bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
36 : raw_ostream &cStream,
37 : int64_t Value,
38 : uint64_t Address,
39 : bool IsBranch,
40 : uint64_t Offset,
41 : uint64_t InstSize) {
42 : struct LLVMOpInfo1 SymbolicOp;
43 921 : std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
44 921 : SymbolicOp.Value = Value;
45 :
46 1837 : if (!GetOpInfo ||
47 916 : !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
48 : // Clear SymbolicOp.Value from above and also all other fields.
49 900 : std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
50 :
51 : // At this point, GetOpInfo() did not find any relocation information about
52 : // this operand and we are left to use the SymbolLookUp() call back to guess
53 : // if the Value is the address of a symbol. In the case this is a branch
54 : // that always makes sense to guess. But in the case of an immediate it is
55 : // a bit more questionable if it is an address of a symbol or some other
56 : // reference. So if the immediate Value comes from a width of 1 byte,
57 : // InstSize, we will not guess it is an address of a symbol. Because in
58 : // object files assembled starting at address 0 this usually leads to
59 : // incorrect symbolication.
60 900 : if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
61 761 : return false;
62 :
63 : uint64_t ReferenceType;
64 687 : if (IsBranch)
65 116 : ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
66 : else
67 571 : ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
68 : const char *ReferenceName;
69 687 : const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
70 : &ReferenceName);
71 687 : if (Name) {
72 36 : SymbolicOp.AddSymbol.Name = Name;
73 36 : SymbolicOp.AddSymbol.Present = true;
74 : // If Name is a C++ symbol name put the human readable name in a comment.
75 36 : if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
76 1 : cStream << ReferenceName;
77 : }
78 : // For branches always create an MCExpr so it gets printed as hex address.
79 651 : else if (IsBranch) {
80 103 : SymbolicOp.Value = Value;
81 : }
82 687 : if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
83 32 : cStream << "symbol stub for: " << ReferenceName;
84 655 : else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
85 2 : cStream << "Objc message: " << ReferenceName;
86 687 : if (!Name && !IsBranch)
87 : return false;
88 : }
89 :
90 : const MCExpr *Add = nullptr;
91 160 : if (SymbolicOp.AddSymbol.Present) {
92 57 : if (SymbolicOp.AddSymbol.Name) {
93 : StringRef Name(SymbolicOp.AddSymbol.Name);
94 108 : MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
95 54 : Add = MCSymbolRefExpr::create(Sym, Ctx);
96 : } else {
97 3 : Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
98 : }
99 : }
100 :
101 : const MCExpr *Sub = nullptr;
102 160 : if (SymbolicOp.SubtractSymbol.Present) {
103 9 : if (SymbolicOp.SubtractSymbol.Name) {
104 : StringRef Name(SymbolicOp.SubtractSymbol.Name);
105 8 : MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
106 4 : Sub = MCSymbolRefExpr::create(Sym, Ctx);
107 : } else {
108 5 : Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
109 : }
110 : }
111 :
112 : const MCExpr *Off = nullptr;
113 160 : if (SymbolicOp.Value != 0)
114 109 : Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
115 :
116 : const MCExpr *Expr;
117 160 : if (Sub) {
118 : const MCExpr *LHS;
119 9 : if (Add)
120 9 : LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
121 : else
122 0 : LHS = MCUnaryExpr::createMinus(Sub, Ctx);
123 9 : if (Off)
124 6 : Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
125 : else
126 : Expr = LHS;
127 151 : } else if (Add) {
128 48 : if (Off)
129 0 : Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
130 : else
131 : Expr = Add;
132 : } else {
133 103 : if (Off)
134 : Expr = Off;
135 : else
136 0 : Expr = MCConstantExpr::create(0, Ctx);
137 : }
138 :
139 160 : Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
140 160 : if (!Expr)
141 : return false;
142 :
143 160 : MI.addOperand(MCOperand::createExpr(Expr));
144 160 : return true;
145 : }
146 :
147 : // This function tries to add a comment as to what is being referenced by a load
148 : // instruction with the base register that is the Pc. These can often be values
149 : // in a literal pool near the Address of the instruction. The Address of the
150 : // instruction and its immediate Value are used as a possible literal pool entry.
151 : // The SymbolLookUp call back will return the name of a symbol referenced by the
152 : // literal pool's entry if the referenced address is that of a symbol. Or it
153 : // will return a pointer to a literal 'C' string if the referenced address of
154 : // the literal pool's entry is an address into a section with C string literals.
155 : // Or if the reference is to an Objective-C data structure it will return a
156 : // specific reference type for it and a string.
157 21 : void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
158 : int64_t Value,
159 : uint64_t Address) {
160 21 : if (SymbolLookUp) {
161 20 : uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
162 : const char *ReferenceName;
163 20 : (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
164 20 : if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
165 2 : cStream << "literal pool symbol address: " << ReferenceName;
166 18 : else if(ReferenceType ==
167 : LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
168 5 : cStream << "literal pool for: \"";
169 10 : cStream.write_escaped(ReferenceName);
170 5 : cStream << "\"";
171 : }
172 13 : else if(ReferenceType ==
173 : LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
174 2 : cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
175 11 : else if(ReferenceType ==
176 : LLVMDisassembler_ReferenceType_Out_Objc_Message)
177 0 : cStream << "Objc message: " << ReferenceName;
178 11 : else if(ReferenceType ==
179 : LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
180 0 : cStream << "Objc message ref: " << ReferenceName;
181 11 : else if(ReferenceType ==
182 : LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
183 4 : cStream << "Objc selector ref: " << ReferenceName;
184 7 : else if(ReferenceType ==
185 : LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
186 4 : cStream << "Objc class ref: " << ReferenceName;
187 : }
188 21 : }
189 :
190 : namespace llvm {
191 70 : MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
192 : LLVMSymbolLookupCallback SymbolLookUp,
193 : void *DisInfo, MCContext *Ctx,
194 : std::unique_ptr<MCRelocationInfo> &&RelInfo) {
195 : assert(Ctx && "No MCContext given for symbolic disassembly");
196 :
197 : return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
198 70 : SymbolLookUp, DisInfo);
199 : }
200 : }
|