LLVM  16.0.0git
AArch64ExternalSymbolizer.cpp
Go to the documentation of this file.
1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "Utils/AArch64BaseInfo.h"
12 #include "llvm/MC/MCContext.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInst.h"
15 #include "llvm/MC/MCRegisterInfo.h"
16 #include "llvm/Support/Format.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "aarch64-disassembler"
22 
24 getVariant(uint64_t LLVMDisassembler_VariantKind) {
25  switch (LLVMDisassembler_VariantKind) {
40  default:
41  llvm_unreachable("bad LLVMDisassembler_VariantKind");
42  }
43 }
44 
45 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
46 /// operand in place of the immediate Value in the MCInst. The immediate
47 /// Value has not had any PC adjustment made by the caller. If the instruction
48 /// is a branch that adds the PC to the immediate Value then isBranch is
49 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
50 /// symbolic information at the Address for this instrution. If that returns
51 /// non-zero then the symbolic information it returns is used to create an
52 /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
53 /// returns zero and isBranch is Success then a symbol look up for
54 /// Address + Value is done and if a symbol is found an MCExpr is created with
55 /// that, else an MCExpr with Address + Value is created. If GetOpInfo()
56 /// returns zero and isBranch is Fail then the Opcode of the MCInst is
57 /// tested and for ADRP an other instructions that help to load of pointers
58 /// a symbol look up is done to see it is returns a specific reference type
59 /// to add to the comment stream. This function returns Success if it adds
60 /// an operand to the MCInst and Fail otherwise.
62  MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
63  bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
64  if (!SymbolLookUp)
65  return false;
66  // FIXME: This method shares a lot of code with
67  // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
68  // refactor the MCExternalSymbolizer interface to allow more of this
69  // implementation to be shared.
70  //
71  struct LLVMOpInfo1 SymbolicOp;
72  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
73  SymbolicOp.Value = Value;
75  const char *ReferenceName;
76  if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
77  1, &SymbolicOp)) {
78  if (IsBranch) {
80  const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
81  Address, &ReferenceName);
82  if (Name) {
83  SymbolicOp.AddSymbol.Name = Name;
84  SymbolicOp.AddSymbol.Present = true;
85  SymbolicOp.Value = 0;
86  } else {
87  SymbolicOp.Value = Address + Value;
88  }
90  CommentStream << "symbol stub for: " << ReferenceName;
91  else if (ReferenceType ==
93  CommentStream << "Objc message: " << ReferenceName;
94  } else if (MI.getOpcode() == AArch64::ADRP) {
96  // otool expects the fully encoded ADRP instruction to be passed in as
97  // the value here, so reconstruct it:
98  const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
99  uint32_t EncodedInst = 0x90000000;
100  EncodedInst |= (Value & 0x3) << 29; // immlo
101  EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
102  EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
103  SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
104  &ReferenceName);
105  CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
106  Value * 0x1000);
107  } else if (MI.getOpcode() == AArch64::ADDXri ||
108  MI.getOpcode() == AArch64::LDRXui ||
109  MI.getOpcode() == AArch64::LDRXl ||
110  MI.getOpcode() == AArch64::ADR) {
111  if (MI.getOpcode() == AArch64::ADDXri)
113  else if (MI.getOpcode() == AArch64::LDRXui)
115  if (MI.getOpcode() == AArch64::LDRXl) {
117  SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
118  &ReferenceName);
119  } else if (MI.getOpcode() == AArch64::ADR) {
121  SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
122  &ReferenceName);
123  } else {
124  const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
125  // otool expects the fully encoded ADD/LDR instruction to be passed in
126  // as the value here, so reconstruct it:
127  unsigned EncodedInst =
128  MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
129  EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
130  EncodedInst |=
131  MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
132  EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
133 
134  SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
135  &ReferenceName);
136  }
138  CommentStream << "literal pool symbol address: " << ReferenceName;
139  else if (ReferenceType ==
141  CommentStream << "literal pool for: \"";
142  CommentStream.write_escaped(ReferenceName);
143  CommentStream << "\"";
144  } else if (ReferenceType ==
146  CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
147  else if (ReferenceType ==
149  CommentStream << "Objc message: " << ReferenceName;
150  else if (ReferenceType ==
152  CommentStream << "Objc message ref: " << ReferenceName;
153  else if (ReferenceType ==
155  CommentStream << "Objc selector ref: " << ReferenceName;
156  else if (ReferenceType ==
158  CommentStream << "Objc class ref: " << ReferenceName;
159  // For these instructions, the SymbolLookUp() above is just to get the
160  // ReferenceType and ReferenceName. We want to make sure not to
161  // fall through so we don't build an MCExpr to leave the disassembly
162  // of the immediate values of these instructions to the InstPrinter.
163  return false;
164  } else {
165  return false;
166  }
167  }
168 
169  const MCExpr *Add = nullptr;
170  if (SymbolicOp.AddSymbol.Present) {
171  if (SymbolicOp.AddSymbol.Name) {
172  StringRef Name(SymbolicOp.AddSymbol.Name);
175  if (Variant != MCSymbolRefExpr::VK_None)
176  Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
177  else
179  } else {
181  }
182  }
183 
184  const MCExpr *Sub = nullptr;
185  if (SymbolicOp.SubtractSymbol.Present) {
186  if (SymbolicOp.SubtractSymbol.Name) {
187  StringRef Name(SymbolicOp.SubtractSymbol.Name);
189  Sub = MCSymbolRefExpr::create(Sym, Ctx);
190  } else {
191  Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
192  }
193  }
194 
195  const MCExpr *Off = nullptr;
196  if (SymbolicOp.Value != 0)
197  Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
198 
199  const MCExpr *Expr;
200  if (Sub) {
201  const MCExpr *LHS;
202  if (Add)
204  else
206  if (Off)
207  Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
208  else
209  Expr = LHS;
210  } else if (Add) {
211  if (Off)
212  Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
213  else
214  Expr = Add;
215  } else {
216  if (Off)
217  Expr = Off;
218  else
219  Expr = MCConstantExpr::create(0, Ctx);
220  }
221 
222  MI.addOperand(MCOperand::createExpr(Expr));
223 
224  return true;
225 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::MCSymbolizer::Ctx
MCContext & Ctx
Definition: MCSymbolizer.h:41
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::MCSymbolRefExpr::VK_PAGEOFF
@ VK_PAGEOFF
Definition: MCExpr.h:220
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
ReferenceType
Definition: ItaniumDemangle.h:638
llvm::MCContext::getRegisterInfo
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:449
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
LLVMOpInfo1::AddSymbol
struct LLVMOpInfoSymbol1 AddSymbol
Definition: DisassemblerTypes.h:79
LLVMDisassembler_ReferenceType_In_ARM64_ADDXri
#define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri
Definition: DisassemblerTypes.h:137
llvm::MCUnaryExpr::createMinus
static const MCUnaryExpr * createMinus(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:451
llvm::raw_ostream::write_escaped
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
Definition: raw_ostream.cpp:161
AArch64BaseInfo.h
llvm::MCRegisterInfo::getEncodingValue
uint16_t getEncodingValue(MCRegister RegNo) const
Returns the encoding for RegNo.
Definition: MCRegisterInfo.h:553
llvm::MCExternalSymbolizer::GetOpInfo
LLVMOpInfoCallback GetOpInfo
Definition: MCExternalSymbolizer.h:32
AArch64ExternalSymbolizer.h
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Format.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref
#define LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref
Definition: DisassemblerTypes.h:157
LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr
#define LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr
Definition: DisassemblerTypes.h:150
llvm::MCSymbolRefExpr::VK_GOTPAGEOFF
@ VK_GOTPAGEOFF
Definition: MCExpr.h:222
LLVMDisassembler_ReferenceType_In_Branch
#define LLVMDisassembler_ReferenceType_In_Branch
Definition: DisassemblerTypes.h:130
MCContext.h
LLVMOpInfoSymbol1::Name
const char * Name
Definition: DisassemblerTypes.h:74
LLVMOpInfoSymbol1::Value
uint64_t Value
Definition: DisassemblerTypes.h:75
LLVMOpInfo1
Definition: DisassemblerTypes.h:78
MCInst.h
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr
#define LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr
Definition: DisassemblerTypes.h:148
getVariant
static MCSymbolRefExpr::VariantKind getVariant(uint64_t LLVMDisassembler_VariantKind)
Definition: AArch64ExternalSymbolizer.cpp:24
LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref
#define LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref
Definition: DisassemblerTypes.h:159
LLVMDisassembler_VariantKind_ARM64_TLVOFF
#define LLVMDisassembler_VariantKind_ARM64_TLVOFF
Definition: DisassemblerTypes.h:104
AArch64AddressingModes.h
LLVMDisassembler_ReferenceType_In_ARM64_LDRXl
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl
Definition: DisassemblerTypes.h:141
LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF
Definition: DisassemblerTypes.h:102
llvm::MCSymbolRefExpr::VariantKind
VariantKind
Definition: MCExpr.h:194
uint64_t
llvm::MCExternalSymbolizer::DisInfo
void * DisInfo
The pointer to the block of symbolic information for above call back.
Definition: MCExternalSymbolizer.h:36
LLVMDisassembler_VariantKind_ARM64_TLVP
#define LLVMDisassembler_VariantKind_ARM64_TLVP
Definition: DisassemblerTypes.h:103
LLVMOpInfo1::Value
uint64_t Value
Definition: DisassemblerTypes.h:81
MCRegisterInfo.h
llvm::MCExternalSymbolizer::SymbolLookUp
LLVMSymbolLookupCallback SymbolLookUp
The function to lookup a symbol name.
Definition: MCExternalSymbolizer.h:34
LLVMDisassembler_ReferenceType_In_ARM64_ADR
#define LLVMDisassembler_ReferenceType_In_ARM64_ADR
Definition: DisassemblerTypes.h:143
llvm::MCBinaryExpr::createSub
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:610
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
LLVMDisassembler_VariantKind_ARM64_PAGE
#define LLVMDisassembler_VariantKind_ARM64_PAGE
The ARM64 target VariantKinds.
Definition: DisassemblerTypes.h:99
LLVMDisassembler_VariantKind_None
#define LLVMDisassembler_VariantKind_None
The operand VariantKinds for symbolic disassembly.
Definition: DisassemblerTypes.h:88
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:76
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition: MCRegisterInfo.h:135
LLVMDisassembler_ReferenceType_In_ARM64_ADRP
#define LLVMDisassembler_ReferenceType_In_ARM64_ADRP
Definition: DisassemblerTypes.h:135
LLVMDisassembler_ReferenceType_In_ARM64_LDRXui
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui
Definition: DisassemblerTypes.h:139
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MCSymbolRefExpr::VK_TLVPPAGE
@ VK_TLVPPAGE
Definition: MCExpr.h:217
LLVMOpInfo1::VariantKind
uint64_t VariantKind
Definition: DisassemblerTypes.h:82
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
LLVMDisassembler_VariantKind_ARM64_PAGEOFF
#define LLVMDisassembler_VariantKind_ARM64_PAGEOFF
Definition: DisassemblerTypes.h:100
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
LLVMDisassembler_VariantKind_ARM64_GOTPAGE
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGE
Definition: DisassemblerTypes.h:101
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::AArch64ISD::ADRP
@ ADRP
Definition: AArch64ISelLowering.h:75
llvm::AArch64ExternalSymbolizer::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic operand in place of the im...
Definition: AArch64ExternalSymbolizer.cpp:61
LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref
#define LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref
Definition: DisassemblerTypes.h:161
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
LLVMOpInfoSymbol1::Present
uint64_t Present
Definition: DisassemblerTypes.h:73
llvm::MCSymbolRefExpr::VK_GOTPAGE
@ VK_GOTPAGE
Definition: MCExpr.h:221
LLVMDisassembler_ReferenceType_Out_Objc_Message
#define LLVMDisassembler_ReferenceType_Out_Objc_Message
Definition: DisassemblerTypes.h:155
raw_ostream.h
llvm::MCSymbolRefExpr::VK_PAGE
@ VK_PAGE
Definition: MCExpr.h:219
LLVMDisassembler_ReferenceType_Out_SymbolStub
#define LLVMDisassembler_ReferenceType_Out_SymbolStub
Definition: DisassemblerTypes.h:146
MCExpr.h
LLVMOpInfo1::SubtractSymbol
struct LLVMOpInfoSymbol1 SubtractSymbol
Definition: DisassemblerTypes.h:80
llvm::MCSymbolRefExpr::VK_TLVPPAGEOFF
@ VK_TLVPPAGEOFF
Definition: MCExpr.h:218
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref
#define LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref
Definition: DisassemblerTypes.h:153