File: | build/source/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp |
Warning: | line 1456, column 15 Value stored to 'cpu' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- DisassemblerLLVMC.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "DisassemblerLLVMC.h" |
10 | |
11 | #include "llvm-c/Disassembler.h" |
12 | #include "llvm/ADT/SmallString.h" |
13 | #include "llvm/ADT/StringExtras.h" |
14 | #include "llvm/MC/MCAsmInfo.h" |
15 | #include "llvm/MC/MCContext.h" |
16 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
17 | #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" |
18 | #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" |
19 | #include "llvm/MC/MCInst.h" |
20 | #include "llvm/MC/MCInstPrinter.h" |
21 | #include "llvm/MC/MCInstrInfo.h" |
22 | #include "llvm/MC/MCRegisterInfo.h" |
23 | #include "llvm/MC/MCSubtargetInfo.h" |
24 | #include "llvm/MC/MCTargetOptions.h" |
25 | #include "llvm/MC/TargetRegistry.h" |
26 | #include "llvm/Support/ErrorHandling.h" |
27 | #include "llvm/Support/ScopedPrinter.h" |
28 | #include "llvm/Support/TargetSelect.h" |
29 | #include "llvm/TargetParser/AArch64TargetParser.h" |
30 | |
31 | #include "lldb/Core/Address.h" |
32 | #include "lldb/Core/Module.h" |
33 | #include "lldb/Symbol/SymbolContext.h" |
34 | #include "lldb/Target/ExecutionContext.h" |
35 | #include "lldb/Target/Process.h" |
36 | #include "lldb/Target/RegisterContext.h" |
37 | #include "lldb/Target/SectionLoadList.h" |
38 | #include "lldb/Target/StackFrame.h" |
39 | #include "lldb/Target/Target.h" |
40 | #include "lldb/Utility/DataExtractor.h" |
41 | #include "lldb/Utility/LLDBLog.h" |
42 | #include "lldb/Utility/Log.h" |
43 | #include "lldb/Utility/RegularExpression.h" |
44 | #include "lldb/Utility/Stream.h" |
45 | #include <optional> |
46 | |
47 | using namespace lldb; |
48 | using namespace lldb_private; |
49 | |
50 | LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)namespace lldb_private { void lldb_initialize_DisassemblerLLVMC () { DisassemblerLLVMC::Initialize(); } void lldb_terminate_DisassemblerLLVMC () { DisassemblerLLVMC::Terminate(); } } |
51 | |
52 | class DisassemblerLLVMC::MCDisasmInstance { |
53 | public: |
54 | static std::unique_ptr<MCDisasmInstance> |
55 | Create(const char *triple, const char *cpu, const char *features_str, |
56 | unsigned flavor, DisassemblerLLVMC &owner); |
57 | |
58 | ~MCDisasmInstance() = default; |
59 | |
60 | uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, |
61 | lldb::addr_t pc, llvm::MCInst &mc_inst) const; |
62 | void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, |
63 | std::string &comments_string); |
64 | void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); |
65 | bool CanBranch(llvm::MCInst &mc_inst) const; |
66 | bool HasDelaySlot(llvm::MCInst &mc_inst) const; |
67 | bool IsCall(llvm::MCInst &mc_inst) const; |
68 | bool IsLoad(llvm::MCInst &mc_inst) const; |
69 | bool IsAuthenticated(llvm::MCInst &mc_inst) const; |
70 | |
71 | private: |
72 | MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
73 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
74 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
75 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
76 | std::unique_ptr<llvm::MCContext> &&context_up, |
77 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
78 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up); |
79 | |
80 | std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; |
81 | std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; |
82 | std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; |
83 | std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; |
84 | std::unique_ptr<llvm::MCContext> m_context_up; |
85 | std::unique_ptr<llvm::MCDisassembler> m_disasm_up; |
86 | std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; |
87 | }; |
88 | |
89 | namespace x86 { |
90 | |
91 | /// These are the three values deciding instruction control flow kind. |
92 | /// InstructionLengthDecode function decodes an instruction and get this struct. |
93 | /// |
94 | /// primary_opcode |
95 | /// Primary opcode of the instruction. |
96 | /// For one-byte opcode instruction, it's the first byte after prefix. |
97 | /// For two- and three-byte opcodes, it's the second byte. |
98 | /// |
99 | /// opcode_len |
100 | /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. |
101 | /// |
102 | /// modrm |
103 | /// ModR/M byte of the instruction. |
104 | /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] |
105 | /// may contain a register or specify an addressing mode, depending on MOD. |
106 | struct InstructionOpcodeAndModrm { |
107 | uint8_t primary_opcode; |
108 | uint8_t opcode_len; |
109 | uint8_t modrm; |
110 | }; |
111 | |
112 | /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. |
113 | /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and |
114 | /// instruction set. |
115 | /// |
116 | /// \param[in] opcode_and_modrm |
117 | /// Contains primary_opcode byte, its length, and ModR/M byte. |
118 | /// Refer to the struct InstructionOpcodeAndModrm for details. |
119 | /// |
120 | /// \return |
121 | /// The control flow kind of the instruction or |
122 | /// eInstructionControlFlowKindOther if the instruction doesn't affect |
123 | /// the control flow of the program. |
124 | lldb::InstructionControlFlowKind |
125 | MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { |
126 | uint8_t opcode = opcode_and_modrm.primary_opcode; |
127 | uint8_t opcode_len = opcode_and_modrm.opcode_len; |
128 | uint8_t modrm = opcode_and_modrm.modrm; |
129 | |
130 | if (opcode_len > 2) |
131 | return lldb::eInstructionControlFlowKindOther; |
132 | |
133 | if (opcode >= 0x70 && opcode <= 0x7F) { |
134 | if (opcode_len == 1) |
135 | return lldb::eInstructionControlFlowKindCondJump; |
136 | else |
137 | return lldb::eInstructionControlFlowKindOther; |
138 | } |
139 | |
140 | if (opcode >= 0x80 && opcode <= 0x8F) { |
141 | if (opcode_len == 2) |
142 | return lldb::eInstructionControlFlowKindCondJump; |
143 | else |
144 | return lldb::eInstructionControlFlowKindOther; |
145 | } |
146 | |
147 | switch (opcode) { |
148 | case 0x9A: |
149 | if (opcode_len == 1) |
150 | return lldb::eInstructionControlFlowKindFarCall; |
151 | break; |
152 | case 0xFF: |
153 | if (opcode_len == 1) { |
154 | uint8_t modrm_reg = (modrm >> 3) & 7; |
155 | if (modrm_reg == 2) |
156 | return lldb::eInstructionControlFlowKindCall; |
157 | else if (modrm_reg == 3) |
158 | return lldb::eInstructionControlFlowKindFarCall; |
159 | else if (modrm_reg == 4) |
160 | return lldb::eInstructionControlFlowKindJump; |
161 | else if (modrm_reg == 5) |
162 | return lldb::eInstructionControlFlowKindFarJump; |
163 | } |
164 | break; |
165 | case 0xE8: |
166 | if (opcode_len == 1) |
167 | return lldb::eInstructionControlFlowKindCall; |
168 | break; |
169 | case 0xCD: |
170 | case 0xCC: |
171 | case 0xCE: |
172 | case 0xF1: |
173 | if (opcode_len == 1) |
174 | return lldb::eInstructionControlFlowKindFarCall; |
175 | break; |
176 | case 0xCF: |
177 | if (opcode_len == 1) |
178 | return lldb::eInstructionControlFlowKindFarReturn; |
179 | break; |
180 | case 0xE9: |
181 | case 0xEB: |
182 | if (opcode_len == 1) |
183 | return lldb::eInstructionControlFlowKindJump; |
184 | break; |
185 | case 0xEA: |
186 | if (opcode_len == 1) |
187 | return lldb::eInstructionControlFlowKindFarJump; |
188 | break; |
189 | case 0xE3: |
190 | case 0xE0: |
191 | case 0xE1: |
192 | case 0xE2: |
193 | if (opcode_len == 1) |
194 | return lldb::eInstructionControlFlowKindCondJump; |
195 | break; |
196 | case 0xC3: |
197 | case 0xC2: |
198 | if (opcode_len == 1) |
199 | return lldb::eInstructionControlFlowKindReturn; |
200 | break; |
201 | case 0xCB: |
202 | case 0xCA: |
203 | if (opcode_len == 1) |
204 | return lldb::eInstructionControlFlowKindFarReturn; |
205 | break; |
206 | case 0x05: |
207 | case 0x34: |
208 | if (opcode_len == 2) |
209 | return lldb::eInstructionControlFlowKindFarCall; |
210 | break; |
211 | case 0x35: |
212 | case 0x07: |
213 | if (opcode_len == 2) |
214 | return lldb::eInstructionControlFlowKindFarReturn; |
215 | break; |
216 | case 0x01: |
217 | if (opcode_len == 2) { |
218 | switch (modrm) { |
219 | case 0xc1: |
220 | return lldb::eInstructionControlFlowKindFarCall; |
221 | case 0xc2: |
222 | case 0xc3: |
223 | return lldb::eInstructionControlFlowKindFarReturn; |
224 | default: |
225 | break; |
226 | } |
227 | } |
228 | break; |
229 | default: |
230 | break; |
231 | } |
232 | |
233 | return lldb::eInstructionControlFlowKindOther; |
234 | } |
235 | |
236 | /// Decode an instruction into opcode, modrm and opcode_len. |
237 | /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. |
238 | /// Opcodes in x86 are generally the first byte of instruction, though two-byte |
239 | /// instructions and prefixes exist. ModR/M is the byte following the opcode |
240 | /// and adds additional information for how the instruction is executed. |
241 | /// |
242 | /// \param[in] inst_bytes |
243 | /// Raw bytes of the instruction |
244 | /// |
245 | /// |
246 | /// \param[in] bytes_len |
247 | /// The length of the inst_bytes array. |
248 | /// |
249 | /// \param[in] is_exec_mode_64b |
250 | /// If true, the execution mode is 64 bit. |
251 | /// |
252 | /// \return |
253 | /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding |
254 | /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition |
255 | /// for more details. |
256 | /// Otherwise if the given instruction is invalid, returns std::nullopt. |
257 | std::optional<InstructionOpcodeAndModrm> |
258 | InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, |
259 | bool is_exec_mode_64b) { |
260 | int op_idx = 0; |
261 | bool prefix_done = false; |
262 | InstructionOpcodeAndModrm ret = {0, 0, 0}; |
263 | |
264 | // In most cases, the primary_opcode is the first byte of the instruction |
265 | // but some instructions have a prefix to be skipped for these calculations. |
266 | // The following mapping is inspired from libipt's instruction decoding logic |
267 | // in `src/pt_ild.c` |
268 | while (!prefix_done) { |
269 | if (op_idx >= bytes_len) |
270 | return std::nullopt; |
271 | |
272 | ret.primary_opcode = inst_bytes[op_idx]; |
273 | switch (ret.primary_opcode) { |
274 | // prefix_ignore |
275 | case 0x26: |
276 | case 0x2e: |
277 | case 0x36: |
278 | case 0x3e: |
279 | case 0x64: |
280 | case 0x65: |
281 | // prefix_osz, prefix_asz |
282 | case 0x66: |
283 | case 0x67: |
284 | // prefix_lock, prefix_f2, prefix_f3 |
285 | case 0xf0: |
286 | case 0xf2: |
287 | case 0xf3: |
288 | op_idx++; |
289 | break; |
290 | |
291 | // prefix_rex |
292 | case 0x40: |
293 | case 0x41: |
294 | case 0x42: |
295 | case 0x43: |
296 | case 0x44: |
297 | case 0x45: |
298 | case 0x46: |
299 | case 0x47: |
300 | case 0x48: |
301 | case 0x49: |
302 | case 0x4a: |
303 | case 0x4b: |
304 | case 0x4c: |
305 | case 0x4d: |
306 | case 0x4e: |
307 | case 0x4f: |
308 | if (is_exec_mode_64b) |
309 | op_idx++; |
310 | else |
311 | prefix_done = true; |
312 | break; |
313 | |
314 | // prefix_vex_c4, c5 |
315 | case 0xc5: |
316 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
317 | prefix_done = true; |
318 | break; |
319 | } |
320 | |
321 | ret.opcode_len = 2; |
322 | ret.primary_opcode = inst_bytes[op_idx + 2]; |
323 | ret.modrm = inst_bytes[op_idx + 3]; |
324 | return ret; |
325 | |
326 | case 0xc4: |
327 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
328 | prefix_done = true; |
329 | break; |
330 | } |
331 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; |
332 | ret.primary_opcode = inst_bytes[op_idx + 3]; |
333 | ret.modrm = inst_bytes[op_idx + 4]; |
334 | return ret; |
335 | |
336 | // prefix_evex |
337 | case 0x62: |
338 | if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { |
339 | prefix_done = true; |
340 | break; |
341 | } |
342 | ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; |
343 | ret.primary_opcode = inst_bytes[op_idx + 4]; |
344 | ret.modrm = inst_bytes[op_idx + 5]; |
345 | return ret; |
346 | |
347 | default: |
348 | prefix_done = true; |
349 | break; |
350 | } |
351 | } // prefix done |
352 | |
353 | ret.primary_opcode = inst_bytes[op_idx]; |
354 | ret.modrm = inst_bytes[op_idx + 1]; |
355 | ret.opcode_len = 1; |
356 | |
357 | // If the first opcode is 0F, it's two- or three- byte opcodes. |
358 | if (ret.primary_opcode == 0x0F) { |
359 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
360 | |
361 | if (ret.primary_opcode == 0x38) { |
362 | ret.opcode_len = 3; |
363 | ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte |
364 | ret.modrm = inst_bytes[op_idx + 1]; |
365 | } else if (ret.primary_opcode == 0x3A) { |
366 | ret.opcode_len = 3; |
367 | ret.primary_opcode = inst_bytes[++op_idx]; |
368 | ret.modrm = inst_bytes[op_idx + 1]; |
369 | } else if ((ret.primary_opcode & 0xf8) == 0x38) { |
370 | ret.opcode_len = 0; |
371 | ret.primary_opcode = inst_bytes[++op_idx]; |
372 | ret.modrm = inst_bytes[op_idx + 1]; |
373 | } else if (ret.primary_opcode == 0x0F) { |
374 | ret.opcode_len = 3; |
375 | // opcode is 0x0F, no needs to update |
376 | ret.modrm = inst_bytes[op_idx + 1]; |
377 | } else { |
378 | ret.opcode_len = 2; |
379 | ret.modrm = inst_bytes[op_idx + 1]; |
380 | } |
381 | } |
382 | |
383 | return ret; |
384 | } |
385 | |
386 | lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, |
387 | Opcode m_opcode) { |
388 | std::optional<InstructionOpcodeAndModrm> ret; |
389 | |
390 | if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { |
391 | // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes |
392 | return lldb::eInstructionControlFlowKindUnknown; |
393 | } |
394 | |
395 | // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. |
396 | // These are the three values deciding instruction control flow kind. |
397 | ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), |
398 | m_opcode.GetByteSize(), is_exec_mode_64b); |
399 | if (!ret) |
400 | return lldb::eInstructionControlFlowKindUnknown; |
401 | else |
402 | return MapOpcodeIntoControlFlowKind(*ret); |
403 | } |
404 | |
405 | } // namespace x86 |
406 | |
407 | class InstructionLLVMC : public lldb_private::Instruction { |
408 | public: |
409 | InstructionLLVMC(DisassemblerLLVMC &disasm, |
410 | const lldb_private::Address &address, |
411 | AddressClass addr_class) |
412 | : Instruction(address, addr_class), |
413 | m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( |
414 | disasm.shared_from_this())) {} |
415 | |
416 | ~InstructionLLVMC() override = default; |
417 | |
418 | bool DoesBranch() override { |
419 | VisitInstruction(); |
420 | return m_does_branch; |
421 | } |
422 | |
423 | bool HasDelaySlot() override { |
424 | VisitInstruction(); |
425 | return m_has_delay_slot; |
426 | } |
427 | |
428 | bool IsLoad() override { |
429 | VisitInstruction(); |
430 | return m_is_load; |
431 | } |
432 | |
433 | bool IsAuthenticated() override { |
434 | VisitInstruction(); |
435 | return m_is_authenticated; |
436 | } |
437 | |
438 | DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { |
439 | DisassemblerScope disasm(*this); |
440 | return GetDisasmToUse(is_alternate_isa, disasm); |
441 | } |
442 | |
443 | size_t Decode(const lldb_private::Disassembler &disassembler, |
444 | const lldb_private::DataExtractor &data, |
445 | lldb::offset_t data_offset) override { |
446 | // All we have to do is read the opcode which can be easy for some |
447 | // architectures |
448 | bool got_op = false; |
449 | DisassemblerScope disasm(*this); |
450 | if (disasm) { |
451 | const ArchSpec &arch = disasm->GetArchitecture(); |
452 | const lldb::ByteOrder byte_order = data.GetByteOrder(); |
453 | |
454 | const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); |
455 | const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); |
456 | if (min_op_byte_size == max_op_byte_size) { |
457 | // Fixed size instructions, just read that amount of data. |
458 | if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) |
459 | return false; |
460 | |
461 | switch (min_op_byte_size) { |
462 | case 1: |
463 | m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); |
464 | got_op = true; |
465 | break; |
466 | |
467 | case 2: |
468 | m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); |
469 | got_op = true; |
470 | break; |
471 | |
472 | case 4: |
473 | m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); |
474 | got_op = true; |
475 | break; |
476 | |
477 | case 8: |
478 | m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); |
479 | got_op = true; |
480 | break; |
481 | |
482 | default: |
483 | m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), |
484 | min_op_byte_size); |
485 | got_op = true; |
486 | break; |
487 | } |
488 | } |
489 | if (!got_op) { |
490 | bool is_alternate_isa = false; |
491 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
492 | GetDisasmToUse(is_alternate_isa, disasm); |
493 | |
494 | const llvm::Triple::ArchType machine = arch.GetMachine(); |
495 | if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { |
496 | if (machine == llvm::Triple::thumb || is_alternate_isa) { |
497 | uint32_t thumb_opcode = data.GetU16(&data_offset); |
498 | if ((thumb_opcode & 0xe000) != 0xe000 || |
499 | ((thumb_opcode & 0x1800u) == 0)) { |
500 | m_opcode.SetOpcode16(thumb_opcode, byte_order); |
501 | m_is_valid = true; |
502 | } else { |
503 | thumb_opcode <<= 16; |
504 | thumb_opcode |= data.GetU16(&data_offset); |
505 | m_opcode.SetOpcode16_2(thumb_opcode, byte_order); |
506 | m_is_valid = true; |
507 | } |
508 | } else { |
509 | m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); |
510 | m_is_valid = true; |
511 | } |
512 | } else { |
513 | // The opcode isn't evenly sized, so we need to actually use the llvm |
514 | // disassembler to parse it and get the size. |
515 | uint8_t *opcode_data = |
516 | const_cast<uint8_t *>(data.PeekData(data_offset, 1)); |
517 | const size_t opcode_data_len = data.BytesLeft(data_offset); |
518 | const addr_t pc = m_address.GetFileAddress(); |
519 | llvm::MCInst inst; |
520 | |
521 | const size_t inst_size = |
522 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); |
523 | if (inst_size == 0) |
524 | m_opcode.Clear(); |
525 | else { |
526 | m_opcode.SetOpcodeBytes(opcode_data, inst_size); |
527 | m_is_valid = true; |
528 | } |
529 | } |
530 | } |
531 | return m_opcode.GetByteSize(); |
532 | } |
533 | return 0; |
534 | } |
535 | |
536 | void AppendComment(std::string &description) { |
537 | if (m_comment.empty()) |
538 | m_comment.swap(description); |
539 | else { |
540 | m_comment.append(", "); |
541 | m_comment.append(description); |
542 | } |
543 | } |
544 | |
545 | lldb::InstructionControlFlowKind |
546 | GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { |
547 | DisassemblerScope disasm(*this, exe_ctx); |
548 | if (disasm){ |
549 | if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) |
550 | return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode); |
551 | else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) |
552 | return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode); |
553 | } |
554 | |
555 | return eInstructionControlFlowKindUnknown; |
556 | } |
557 | |
558 | void CalculateMnemonicOperandsAndComment( |
559 | const lldb_private::ExecutionContext *exe_ctx) override { |
560 | DataExtractor data; |
561 | const AddressClass address_class = GetAddressClass(); |
562 | |
563 | if (m_opcode.GetData(data)) { |
564 | std::string out_string; |
565 | std::string comment_string; |
566 | |
567 | DisassemblerScope disasm(*this, exe_ctx); |
568 | if (disasm) { |
569 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; |
570 | |
571 | if (address_class == AddressClass::eCodeAlternateISA) |
572 | mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); |
573 | else |
574 | mc_disasm_ptr = disasm->m_disasm_up.get(); |
575 | |
576 | lldb::addr_t pc = m_address.GetFileAddress(); |
577 | m_using_file_addr = true; |
578 | |
579 | const bool data_from_file = disasm->m_data_from_file; |
580 | bool use_hex_immediates = true; |
581 | Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; |
582 | |
583 | if (exe_ctx) { |
584 | Target *target = exe_ctx->GetTargetPtr(); |
585 | if (target) { |
586 | use_hex_immediates = target->GetUseHexImmediates(); |
587 | hex_style = target->GetHexImmediateStyle(); |
588 | |
589 | if (!data_from_file) { |
590 | const lldb::addr_t load_addr = m_address.GetLoadAddress(target); |
591 | if (load_addr != LLDB_INVALID_ADDRESS(18446744073709551615UL)) { |
592 | pc = load_addr; |
593 | m_using_file_addr = false; |
594 | } |
595 | } |
596 | } |
597 | } |
598 | |
599 | const uint8_t *opcode_data = data.GetDataStart(); |
600 | const size_t opcode_data_len = data.GetByteSize(); |
601 | llvm::MCInst inst; |
602 | size_t inst_size = |
603 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); |
604 | |
605 | if (inst_size > 0) { |
606 | mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); |
607 | mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string); |
608 | |
609 | if (!comment_string.empty()) { |
610 | AppendComment(comment_string); |
611 | } |
612 | } |
613 | |
614 | if (inst_size == 0) { |
615 | m_comment.assign("unknown opcode"); |
616 | inst_size = m_opcode.GetByteSize(); |
617 | StreamString mnemonic_strm; |
618 | lldb::offset_t offset = 0; |
619 | lldb::ByteOrder byte_order = data.GetByteOrder(); |
620 | switch (inst_size) { |
621 | case 1: { |
622 | const uint8_t uval8 = data.GetU8(&offset); |
623 | m_opcode.SetOpcode8(uval8, byte_order); |
624 | m_opcode_name.assign(".byte"); |
625 | mnemonic_strm.Printf("0x%2.2x", uval8); |
626 | } break; |
627 | case 2: { |
628 | const uint16_t uval16 = data.GetU16(&offset); |
629 | m_opcode.SetOpcode16(uval16, byte_order); |
630 | m_opcode_name.assign(".short"); |
631 | mnemonic_strm.Printf("0x%4.4x", uval16); |
632 | } break; |
633 | case 4: { |
634 | const uint32_t uval32 = data.GetU32(&offset); |
635 | m_opcode.SetOpcode32(uval32, byte_order); |
636 | m_opcode_name.assign(".long"); |
637 | mnemonic_strm.Printf("0x%8.8x", uval32); |
638 | } break; |
639 | case 8: { |
640 | const uint64_t uval64 = data.GetU64(&offset); |
641 | m_opcode.SetOpcode64(uval64, byte_order); |
642 | m_opcode_name.assign(".quad"); |
643 | mnemonic_strm.Printf("0x%16.16" PRIx64"l" "x", uval64); |
644 | } break; |
645 | default: |
646 | if (inst_size == 0) |
647 | return; |
648 | else { |
649 | const uint8_t *bytes = data.PeekData(offset, inst_size); |
650 | if (bytes == nullptr) |
651 | return; |
652 | m_opcode_name.assign(".byte"); |
653 | m_opcode.SetOpcodeBytes(bytes, inst_size); |
654 | mnemonic_strm.Printf("0x%2.2x", bytes[0]); |
655 | for (uint32_t i = 1; i < inst_size; ++i) |
656 | mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); |
657 | } |
658 | break; |
659 | } |
660 | m_mnemonics = std::string(mnemonic_strm.GetString()); |
661 | return; |
662 | } |
663 | |
664 | static RegularExpression s_regex( |
665 | llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); |
666 | |
667 | llvm::SmallVector<llvm::StringRef, 4> matches; |
668 | if (s_regex.Execute(out_string, &matches)) { |
669 | m_opcode_name = matches[1].str(); |
670 | m_mnemonics = matches[2].str(); |
671 | } |
672 | } |
673 | } |
674 | } |
675 | |
676 | bool IsValid() const { return m_is_valid; } |
677 | |
678 | bool UsingFileAddress() const { return m_using_file_addr; } |
679 | size_t GetByteSize() const { return m_opcode.GetByteSize(); } |
680 | |
681 | /// Grants exclusive access to the disassembler and initializes it with the |
682 | /// given InstructionLLVMC and an optional ExecutionContext. |
683 | class DisassemblerScope { |
684 | std::shared_ptr<DisassemblerLLVMC> m_disasm; |
685 | |
686 | public: |
687 | explicit DisassemblerScope( |
688 | InstructionLLVMC &i, |
689 | const lldb_private::ExecutionContext *exe_ctx = nullptr) |
690 | : m_disasm(i.m_disasm_wp.lock()) { |
691 | m_disasm->m_mutex.lock(); |
692 | m_disasm->m_inst = &i; |
693 | m_disasm->m_exe_ctx = exe_ctx; |
694 | } |
695 | ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } |
696 | |
697 | /// Evaluates to true if this scope contains a valid disassembler. |
698 | operator bool() const { return static_cast<bool>(m_disasm); } |
699 | |
700 | std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } |
701 | }; |
702 | |
703 | static llvm::StringRef::const_iterator |
704 | ConsumeWhitespace(llvm::StringRef::const_iterator osi, |
705 | llvm::StringRef::const_iterator ose) { |
706 | while (osi != ose) { |
707 | switch (*osi) { |
708 | default: |
709 | return osi; |
710 | case ' ': |
711 | case '\t': |
712 | break; |
713 | } |
714 | ++osi; |
715 | } |
716 | |
717 | return osi; |
718 | } |
719 | |
720 | static std::pair<bool, llvm::StringRef::const_iterator> |
721 | ConsumeChar(llvm::StringRef::const_iterator osi, const char c, |
722 | llvm::StringRef::const_iterator ose) { |
723 | bool found = false; |
724 | |
725 | osi = ConsumeWhitespace(osi, ose); |
726 | if (osi != ose && *osi == c) { |
727 | found = true; |
728 | ++osi; |
729 | } |
730 | |
731 | return std::make_pair(found, osi); |
732 | } |
733 | |
734 | static std::pair<Operand, llvm::StringRef::const_iterator> |
735 | ParseRegisterName(llvm::StringRef::const_iterator osi, |
736 | llvm::StringRef::const_iterator ose) { |
737 | Operand ret; |
738 | ret.m_type = Operand::Type::Register; |
739 | std::string str; |
740 | |
741 | osi = ConsumeWhitespace(osi, ose); |
742 | |
743 | while (osi != ose) { |
744 | if (*osi >= '0' && *osi <= '9') { |
745 | if (str.empty()) { |
746 | return std::make_pair(Operand(), osi); |
747 | } else { |
748 | str.push_back(*osi); |
749 | } |
750 | } else if (*osi >= 'a' && *osi <= 'z') { |
751 | str.push_back(*osi); |
752 | } else { |
753 | switch (*osi) { |
754 | default: |
755 | if (str.empty()) { |
756 | return std::make_pair(Operand(), osi); |
757 | } else { |
758 | ret.m_register = ConstString(str); |
759 | return std::make_pair(ret, osi); |
760 | } |
761 | case '%': |
762 | if (!str.empty()) { |
763 | return std::make_pair(Operand(), osi); |
764 | } |
765 | break; |
766 | } |
767 | } |
768 | ++osi; |
769 | } |
770 | |
771 | ret.m_register = ConstString(str); |
772 | return std::make_pair(ret, osi); |
773 | } |
774 | |
775 | static std::pair<Operand, llvm::StringRef::const_iterator> |
776 | ParseImmediate(llvm::StringRef::const_iterator osi, |
777 | llvm::StringRef::const_iterator ose) { |
778 | Operand ret; |
779 | ret.m_type = Operand::Type::Immediate; |
780 | std::string str; |
781 | bool is_hex = false; |
782 | |
783 | osi = ConsumeWhitespace(osi, ose); |
784 | |
785 | while (osi != ose) { |
786 | if (*osi >= '0' && *osi <= '9') { |
787 | str.push_back(*osi); |
788 | } else if (*osi >= 'a' && *osi <= 'f') { |
789 | if (is_hex) { |
790 | str.push_back(*osi); |
791 | } else { |
792 | return std::make_pair(Operand(), osi); |
793 | } |
794 | } else { |
795 | switch (*osi) { |
796 | default: |
797 | if (str.empty()) { |
798 | return std::make_pair(Operand(), osi); |
799 | } else { |
800 | ret.m_immediate = strtoull(str.c_str(), nullptr, 0); |
801 | return std::make_pair(ret, osi); |
802 | } |
803 | case 'x': |
804 | if (!str.compare("0")) { |
805 | is_hex = true; |
806 | str.push_back(*osi); |
807 | } else { |
808 | return std::make_pair(Operand(), osi); |
809 | } |
810 | break; |
811 | case '#': |
812 | case '$': |
813 | if (!str.empty()) { |
814 | return std::make_pair(Operand(), osi); |
815 | } |
816 | break; |
817 | case '-': |
818 | if (str.empty()) { |
819 | ret.m_negative = true; |
820 | } else { |
821 | return std::make_pair(Operand(), osi); |
822 | } |
823 | } |
824 | } |
825 | ++osi; |
826 | } |
827 | |
828 | ret.m_immediate = strtoull(str.c_str(), nullptr, 0); |
829 | return std::make_pair(ret, osi); |
830 | } |
831 | |
832 | // -0x5(%rax,%rax,2) |
833 | static std::pair<Operand, llvm::StringRef::const_iterator> |
834 | ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, |
835 | llvm::StringRef::const_iterator ose) { |
836 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
837 | ParseImmediate(osi, ose); |
838 | if (offset_and_iterator.first.IsValid()) { |
839 | osi = offset_and_iterator.second; |
840 | } |
841 | |
842 | bool found = false; |
843 | std::tie(found, osi) = ConsumeChar(osi, '(', ose); |
844 | if (!found) { |
845 | return std::make_pair(Operand(), osi); |
846 | } |
847 | |
848 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
849 | ParseRegisterName(osi, ose); |
850 | if (base_and_iterator.first.IsValid()) { |
851 | osi = base_and_iterator.second; |
852 | } else { |
853 | return std::make_pair(Operand(), osi); |
854 | } |
855 | |
856 | std::tie(found, osi) = ConsumeChar(osi, ',', ose); |
857 | if (!found) { |
858 | return std::make_pair(Operand(), osi); |
859 | } |
860 | |
861 | std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = |
862 | ParseRegisterName(osi, ose); |
863 | if (index_and_iterator.first.IsValid()) { |
864 | osi = index_and_iterator.second; |
865 | } else { |
866 | return std::make_pair(Operand(), osi); |
867 | } |
868 | |
869 | std::tie(found, osi) = ConsumeChar(osi, ',', ose); |
870 | if (!found) { |
871 | return std::make_pair(Operand(), osi); |
872 | } |
873 | |
874 | std::pair<Operand, llvm::StringRef::const_iterator> |
875 | multiplier_and_iterator = ParseImmediate(osi, ose); |
876 | if (index_and_iterator.first.IsValid()) { |
877 | osi = index_and_iterator.second; |
878 | } else { |
879 | return std::make_pair(Operand(), osi); |
880 | } |
881 | |
882 | std::tie(found, osi) = ConsumeChar(osi, ')', ose); |
883 | if (!found) { |
884 | return std::make_pair(Operand(), osi); |
885 | } |
886 | |
887 | Operand product; |
888 | product.m_type = Operand::Type::Product; |
889 | product.m_children.push_back(index_and_iterator.first); |
890 | product.m_children.push_back(multiplier_and_iterator.first); |
891 | |
892 | Operand index; |
893 | index.m_type = Operand::Type::Sum; |
894 | index.m_children.push_back(base_and_iterator.first); |
895 | index.m_children.push_back(product); |
896 | |
897 | if (offset_and_iterator.first.IsValid()) { |
898 | Operand offset; |
899 | offset.m_type = Operand::Type::Sum; |
900 | offset.m_children.push_back(offset_and_iterator.first); |
901 | offset.m_children.push_back(index); |
902 | |
903 | Operand deref; |
904 | deref.m_type = Operand::Type::Dereference; |
905 | deref.m_children.push_back(offset); |
906 | return std::make_pair(deref, osi); |
907 | } else { |
908 | Operand deref; |
909 | deref.m_type = Operand::Type::Dereference; |
910 | deref.m_children.push_back(index); |
911 | return std::make_pair(deref, osi); |
912 | } |
913 | } |
914 | |
915 | // -0x10(%rbp) |
916 | static std::pair<Operand, llvm::StringRef::const_iterator> |
917 | ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, |
918 | llvm::StringRef::const_iterator ose) { |
919 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
920 | ParseImmediate(osi, ose); |
921 | if (offset_and_iterator.first.IsValid()) { |
922 | osi = offset_and_iterator.second; |
923 | } |
924 | |
925 | bool found = false; |
926 | std::tie(found, osi) = ConsumeChar(osi, '(', ose); |
927 | if (!found) { |
928 | return std::make_pair(Operand(), osi); |
929 | } |
930 | |
931 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
932 | ParseRegisterName(osi, ose); |
933 | if (base_and_iterator.first.IsValid()) { |
934 | osi = base_and_iterator.second; |
935 | } else { |
936 | return std::make_pair(Operand(), osi); |
937 | } |
938 | |
939 | std::tie(found, osi) = ConsumeChar(osi, ')', ose); |
940 | if (!found) { |
941 | return std::make_pair(Operand(), osi); |
942 | } |
943 | |
944 | if (offset_and_iterator.first.IsValid()) { |
945 | Operand offset; |
946 | offset.m_type = Operand::Type::Sum; |
947 | offset.m_children.push_back(offset_and_iterator.first); |
948 | offset.m_children.push_back(base_and_iterator.first); |
949 | |
950 | Operand deref; |
951 | deref.m_type = Operand::Type::Dereference; |
952 | deref.m_children.push_back(offset); |
953 | return std::make_pair(deref, osi); |
954 | } else { |
955 | Operand deref; |
956 | deref.m_type = Operand::Type::Dereference; |
957 | deref.m_children.push_back(base_and_iterator.first); |
958 | return std::make_pair(deref, osi); |
959 | } |
960 | } |
961 | |
962 | // [sp, #8]! |
963 | static std::pair<Operand, llvm::StringRef::const_iterator> |
964 | ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, |
965 | llvm::StringRef::const_iterator ose) { |
966 | bool found = false; |
967 | std::tie(found, osi) = ConsumeChar(osi, '[', ose); |
968 | if (!found) { |
969 | return std::make_pair(Operand(), osi); |
970 | } |
971 | |
972 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
973 | ParseRegisterName(osi, ose); |
974 | if (base_and_iterator.first.IsValid()) { |
975 | osi = base_and_iterator.second; |
976 | } else { |
977 | return std::make_pair(Operand(), osi); |
978 | } |
979 | |
980 | std::tie(found, osi) = ConsumeChar(osi, ',', ose); |
981 | if (!found) { |
982 | return std::make_pair(Operand(), osi); |
983 | } |
984 | |
985 | std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = |
986 | ParseImmediate(osi, ose); |
987 | if (offset_and_iterator.first.IsValid()) { |
988 | osi = offset_and_iterator.second; |
989 | } |
990 | |
991 | std::tie(found, osi) = ConsumeChar(osi, ']', ose); |
992 | if (!found) { |
993 | return std::make_pair(Operand(), osi); |
994 | } |
995 | |
996 | Operand offset; |
997 | offset.m_type = Operand::Type::Sum; |
998 | offset.m_children.push_back(offset_and_iterator.first); |
999 | offset.m_children.push_back(base_and_iterator.first); |
1000 | |
1001 | Operand deref; |
1002 | deref.m_type = Operand::Type::Dereference; |
1003 | deref.m_children.push_back(offset); |
1004 | return std::make_pair(deref, osi); |
1005 | } |
1006 | |
1007 | // [sp] |
1008 | static std::pair<Operand, llvm::StringRef::const_iterator> |
1009 | ParseARMDerefAccess(llvm::StringRef::const_iterator osi, |
1010 | llvm::StringRef::const_iterator ose) { |
1011 | bool found = false; |
1012 | std::tie(found, osi) = ConsumeChar(osi, '[', ose); |
1013 | if (!found) { |
1014 | return std::make_pair(Operand(), osi); |
1015 | } |
1016 | |
1017 | std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = |
1018 | ParseRegisterName(osi, ose); |
1019 | if (base_and_iterator.first.IsValid()) { |
1020 | osi = base_and_iterator.second; |
1021 | } else { |
1022 | return std::make_pair(Operand(), osi); |
1023 | } |
1024 | |
1025 | std::tie(found, osi) = ConsumeChar(osi, ']', ose); |
1026 | if (!found) { |
1027 | return std::make_pair(Operand(), osi); |
1028 | } |
1029 | |
1030 | Operand deref; |
1031 | deref.m_type = Operand::Type::Dereference; |
1032 | deref.m_children.push_back(base_and_iterator.first); |
1033 | return std::make_pair(deref, osi); |
1034 | } |
1035 | |
1036 | static void DumpOperand(const Operand &op, Stream &s) { |
1037 | switch (op.m_type) { |
1038 | case Operand::Type::Dereference: |
1039 | s.PutCString("*"); |
1040 | DumpOperand(op.m_children[0], s); |
1041 | break; |
1042 | case Operand::Type::Immediate: |
1043 | if (op.m_negative) { |
1044 | s.PutCString("-"); |
1045 | } |
1046 | s.PutCString(llvm::to_string(op.m_immediate)); |
1047 | break; |
1048 | case Operand::Type::Invalid: |
1049 | s.PutCString("Invalid"); |
1050 | break; |
1051 | case Operand::Type::Product: |
1052 | s.PutCString("("); |
1053 | DumpOperand(op.m_children[0], s); |
1054 | s.PutCString("*"); |
1055 | DumpOperand(op.m_children[1], s); |
1056 | s.PutCString(")"); |
1057 | break; |
1058 | case Operand::Type::Register: |
1059 | s.PutCString(op.m_register.GetStringRef()); |
1060 | break; |
1061 | case Operand::Type::Sum: |
1062 | s.PutCString("("); |
1063 | DumpOperand(op.m_children[0], s); |
1064 | s.PutCString("+"); |
1065 | DumpOperand(op.m_children[1], s); |
1066 | s.PutCString(")"); |
1067 | break; |
1068 | } |
1069 | } |
1070 | |
1071 | bool ParseOperands( |
1072 | llvm::SmallVectorImpl<Instruction::Operand> &operands) override { |
1073 | const char *operands_string = GetOperands(nullptr); |
1074 | |
1075 | if (!operands_string) { |
1076 | return false; |
1077 | } |
1078 | |
1079 | llvm::StringRef operands_ref(operands_string); |
1080 | |
1081 | llvm::StringRef::const_iterator osi = operands_ref.begin(); |
1082 | llvm::StringRef::const_iterator ose = operands_ref.end(); |
1083 | |
1084 | while (osi != ose) { |
1085 | Operand operand; |
1086 | llvm::StringRef::const_iterator iter; |
1087 | |
1088 | if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), |
1089 | operand.IsValid()) || |
1090 | (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), |
1091 | operand.IsValid()) || |
1092 | (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), |
1093 | operand.IsValid()) || |
1094 | (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), |
1095 | operand.IsValid()) || |
1096 | (std::tie(operand, iter) = ParseRegisterName(osi, ose), |
1097 | operand.IsValid()) || |
1098 | (std::tie(operand, iter) = ParseImmediate(osi, ose), |
1099 | operand.IsValid())) { |
1100 | osi = iter; |
1101 | operands.push_back(operand); |
1102 | } else { |
1103 | return false; |
1104 | } |
1105 | |
1106 | std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = |
1107 | ConsumeChar(osi, ',', ose); |
1108 | if (found_and_iter.first) { |
1109 | osi = found_and_iter.second; |
1110 | } |
1111 | |
1112 | osi = ConsumeWhitespace(osi, ose); |
1113 | } |
1114 | |
1115 | DisassemblerSP disasm_sp = m_disasm_wp.lock(); |
1116 | |
1117 | if (disasm_sp && operands.size() > 1) { |
1118 | // TODO tie this into the MC Disassembler's notion of clobbers. |
1119 | switch (disasm_sp->GetArchitecture().GetMachine()) { |
1120 | default: |
1121 | break; |
1122 | case llvm::Triple::x86: |
1123 | case llvm::Triple::x86_64: |
1124 | operands[operands.size() - 1].m_clobbered = true; |
1125 | break; |
1126 | case llvm::Triple::arm: |
1127 | operands[0].m_clobbered = true; |
1128 | break; |
1129 | } |
1130 | } |
1131 | |
1132 | if (Log *log = GetLog(LLDBLog::Process)) { |
1133 | StreamString ss; |
1134 | |
1135 | ss.Printf("[%s] expands to %zu operands:\n", operands_string, |
1136 | operands.size()); |
1137 | for (const Operand &operand : operands) { |
1138 | ss.PutCString(" "); |
1139 | DumpOperand(operand, ss); |
1140 | ss.PutCString("\n"); |
1141 | } |
1142 | |
1143 | log->PutString(ss.GetString()); |
1144 | } |
1145 | |
1146 | return true; |
1147 | } |
1148 | |
1149 | bool IsCall() override { |
1150 | VisitInstruction(); |
1151 | return m_is_call; |
1152 | } |
1153 | |
1154 | protected: |
1155 | std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; |
1156 | |
1157 | bool m_is_valid = false; |
1158 | bool m_using_file_addr = false; |
1159 | bool m_has_visited_instruction = false; |
1160 | |
1161 | // Be conservative. If we didn't understand the instruction, say it: |
1162 | // - Might branch |
1163 | // - Does not have a delay slot |
1164 | // - Is not a call |
1165 | // - Is not a load |
1166 | // - Is not an authenticated instruction |
1167 | bool m_does_branch = true; |
1168 | bool m_has_delay_slot = false; |
1169 | bool m_is_call = false; |
1170 | bool m_is_load = false; |
1171 | bool m_is_authenticated = false; |
1172 | |
1173 | void VisitInstruction() { |
1174 | if (m_has_visited_instruction) |
1175 | return; |
1176 | |
1177 | DisassemblerScope disasm(*this); |
1178 | if (!disasm) |
1179 | return; |
1180 | |
1181 | DataExtractor data; |
1182 | if (!m_opcode.GetData(data)) |
1183 | return; |
1184 | |
1185 | bool is_alternate_isa; |
1186 | lldb::addr_t pc = m_address.GetFileAddress(); |
1187 | DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = |
1188 | GetDisasmToUse(is_alternate_isa, disasm); |
1189 | const uint8_t *opcode_data = data.GetDataStart(); |
1190 | const size_t opcode_data_len = data.GetByteSize(); |
1191 | llvm::MCInst inst; |
1192 | const size_t inst_size = |
1193 | mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); |
1194 | if (inst_size == 0) |
1195 | return; |
1196 | |
1197 | m_has_visited_instruction = true; |
1198 | m_does_branch = mc_disasm_ptr->CanBranch(inst); |
1199 | m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); |
1200 | m_is_call = mc_disasm_ptr->IsCall(inst); |
1201 | m_is_load = mc_disasm_ptr->IsLoad(inst); |
1202 | m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst); |
1203 | } |
1204 | |
1205 | private: |
1206 | DisassemblerLLVMC::MCDisasmInstance * |
1207 | GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { |
1208 | is_alternate_isa = false; |
1209 | if (disasm) { |
1210 | if (disasm->m_alternate_disasm_up) { |
1211 | const AddressClass address_class = GetAddressClass(); |
1212 | |
1213 | if (address_class == AddressClass::eCodeAlternateISA) { |
1214 | is_alternate_isa = true; |
1215 | return disasm->m_alternate_disasm_up.get(); |
1216 | } |
1217 | } |
1218 | return disasm->m_disasm_up.get(); |
1219 | } |
1220 | return nullptr; |
1221 | } |
1222 | }; |
1223 | |
1224 | std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> |
1225 | DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, |
1226 | const char *features_str, |
1227 | unsigned flavor, |
1228 | DisassemblerLLVMC &owner) { |
1229 | using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; |
1230 | |
1231 | std::string Status; |
1232 | const llvm::Target *curr_target = |
1233 | llvm::TargetRegistry::lookupTarget(triple, Status); |
1234 | if (!curr_target) |
1235 | return Instance(); |
1236 | |
1237 | std::unique_ptr<llvm::MCInstrInfo> instr_info_up( |
1238 | curr_target->createMCInstrInfo()); |
1239 | if (!instr_info_up) |
1240 | return Instance(); |
1241 | |
1242 | std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( |
1243 | curr_target->createMCRegInfo(triple)); |
1244 | if (!reg_info_up) |
1245 | return Instance(); |
1246 | |
1247 | std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( |
1248 | curr_target->createMCSubtargetInfo(triple, cpu, features_str)); |
1249 | if (!subtarget_info_up) |
1250 | return Instance(); |
1251 | |
1252 | llvm::MCTargetOptions MCOptions; |
1253 | std::unique_ptr<llvm::MCAsmInfo> asm_info_up( |
1254 | curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); |
1255 | if (!asm_info_up) |
1256 | return Instance(); |
1257 | |
1258 | std::unique_ptr<llvm::MCContext> context_up( |
1259 | new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(), |
1260 | reg_info_up.get(), subtarget_info_up.get())); |
1261 | if (!context_up) |
1262 | return Instance(); |
1263 | |
1264 | std::unique_ptr<llvm::MCDisassembler> disasm_up( |
1265 | curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); |
1266 | if (!disasm_up) |
1267 | return Instance(); |
1268 | |
1269 | std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( |
1270 | curr_target->createMCRelocationInfo(triple, *context_up)); |
1271 | if (!rel_info_up) |
1272 | return Instance(); |
1273 | |
1274 | std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( |
1275 | curr_target->createMCSymbolizer( |
1276 | triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, |
1277 | context_up.get(), std::move(rel_info_up))); |
1278 | disasm_up->setSymbolizer(std::move(symbolizer_up)); |
1279 | |
1280 | unsigned asm_printer_variant = |
1281 | flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; |
1282 | |
1283 | std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( |
1284 | curr_target->createMCInstPrinter(llvm::Triple{triple}, |
1285 | asm_printer_variant, *asm_info_up, |
1286 | *instr_info_up, *reg_info_up)); |
1287 | if (!instr_printer_up) |
1288 | return Instance(); |
1289 | |
1290 | return Instance( |
1291 | new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up), |
1292 | std::move(subtarget_info_up), std::move(asm_info_up), |
1293 | std::move(context_up), std::move(disasm_up), |
1294 | std::move(instr_printer_up))); |
1295 | } |
1296 | |
1297 | DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( |
1298 | std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, |
1299 | std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, |
1300 | std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, |
1301 | std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, |
1302 | std::unique_ptr<llvm::MCContext> &&context_up, |
1303 | std::unique_ptr<llvm::MCDisassembler> &&disasm_up, |
1304 | std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up) |
1305 | : m_instr_info_up(std::move(instr_info_up)), |
1306 | m_reg_info_up(std::move(reg_info_up)), |
1307 | m_subtarget_info_up(std::move(subtarget_info_up)), |
1308 | m_asm_info_up(std::move(asm_info_up)), |
1309 | m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), |
1310 | m_instr_printer_up(std::move(instr_printer_up)) { |
1311 | assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&(static_cast <bool> (m_instr_info_up && m_reg_info_up && m_subtarget_info_up && m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up ) ? void (0) : __assert_fail ("m_instr_info_up && m_reg_info_up && m_subtarget_info_up && m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up" , "lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp" , 1312, __extension__ __PRETTY_FUNCTION__)) |
1312 | m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up)(static_cast <bool> (m_instr_info_up && m_reg_info_up && m_subtarget_info_up && m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up ) ? void (0) : __assert_fail ("m_instr_info_up && m_reg_info_up && m_subtarget_info_up && m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up" , "lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp" , 1312, __extension__ __PRETTY_FUNCTION__)); |
1313 | } |
1314 | |
1315 | uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( |
1316 | const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, |
1317 | llvm::MCInst &mc_inst) const { |
1318 | llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); |
1319 | llvm::MCDisassembler::DecodeStatus status; |
1320 | |
1321 | uint64_t new_inst_size; |
1322 | status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, |
1323 | llvm::nulls()); |
1324 | if (status == llvm::MCDisassembler::Success) |
1325 | return new_inst_size; |
1326 | else |
1327 | return 0; |
1328 | } |
1329 | |
1330 | void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( |
1331 | llvm::MCInst &mc_inst, std::string &inst_string, |
1332 | std::string &comments_string) { |
1333 | llvm::raw_string_ostream inst_stream(inst_string); |
1334 | llvm::raw_string_ostream comments_stream(comments_string); |
1335 | |
1336 | m_instr_printer_up->setCommentStream(comments_stream); |
1337 | m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(), |
1338 | *m_subtarget_info_up, inst_stream); |
1339 | m_instr_printer_up->setCommentStream(llvm::nulls()); |
1340 | comments_stream.flush(); |
1341 | |
1342 | static std::string g_newlines("\r\n"); |
1343 | |
1344 | for (size_t newline_pos = 0; |
1345 | (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != |
1346 | comments_string.npos; |
1347 | /**/) { |
1348 | comments_string.replace(comments_string.begin() + newline_pos, |
1349 | comments_string.begin() + newline_pos + 1, 1, ' '); |
1350 | } |
1351 | } |
1352 | |
1353 | void DisassemblerLLVMC::MCDisasmInstance::SetStyle( |
1354 | bool use_hex_immed, HexImmediateStyle hex_style) { |
1355 | m_instr_printer_up->setPrintImmHex(use_hex_immed); |
1356 | switch (hex_style) { |
1357 | case eHexStyleC: |
1358 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); |
1359 | break; |
1360 | case eHexStyleAsm: |
1361 | m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); |
1362 | break; |
1363 | } |
1364 | } |
1365 | |
1366 | bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( |
1367 | llvm::MCInst &mc_inst) const { |
1368 | return m_instr_info_up->get(mc_inst.getOpcode()) |
1369 | .mayAffectControlFlow(mc_inst, *m_reg_info_up); |
1370 | } |
1371 | |
1372 | bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( |
1373 | llvm::MCInst &mc_inst) const { |
1374 | return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); |
1375 | } |
1376 | |
1377 | bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { |
1378 | return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); |
1379 | } |
1380 | |
1381 | bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const { |
1382 | return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad(); |
1383 | } |
1384 | |
1385 | bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated( |
1386 | llvm::MCInst &mc_inst) const { |
1387 | const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode()); |
1388 | |
1389 | // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4 |
1390 | // == 'a' + 'c') as authenticated instructions for reporting purposes, in |
1391 | // addition to the standard authenticated instructions specified in ARMv8.3. |
1392 | bool IsBrkC47x = false; |
1393 | if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) { |
1394 | const llvm::MCOperand &Op0 = mc_inst.getOperand(0); |
1395 | if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474) |
1396 | IsBrkC47x = true; |
1397 | } |
1398 | |
1399 | return InstrDesc.isAuthenticated() || IsBrkC47x; |
1400 | } |
1401 | |
1402 | DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, |
1403 | const char *flavor_string) |
1404 | : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), |
1405 | m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS(18446744073709551615UL)), |
1406 | m_adrp_insn() { |
1407 | if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { |
1408 | m_flavor.assign("default"); |
1409 | } |
1410 | |
1411 | unsigned flavor = ~0U; |
1412 | llvm::Triple triple = arch.GetTriple(); |
1413 | |
1414 | // So far the only supported flavor is "intel" on x86. The base class will |
1415 | // set this correctly coming in. |
1416 | if (triple.getArch() == llvm::Triple::x86 || |
1417 | triple.getArch() == llvm::Triple::x86_64) { |
1418 | if (m_flavor == "intel") { |
1419 | flavor = 1; |
1420 | } else if (m_flavor == "att") { |
1421 | flavor = 0; |
1422 | } |
1423 | } |
1424 | |
1425 | ArchSpec thumb_arch(arch); |
1426 | if (triple.getArch() == llvm::Triple::arm) { |
1427 | std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); |
1428 | // Replace "arm" with "thumb" so we get all thumb variants correct |
1429 | if (thumb_arch_name.size() > 3) { |
1430 | thumb_arch_name.erase(0, 3); |
1431 | thumb_arch_name.insert(0, "thumb"); |
1432 | } else { |
1433 | thumb_arch_name = "thumbv9.3a"; |
1434 | } |
1435 | thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); |
1436 | } |
1437 | |
1438 | // If no sub architecture specified then use the most recent arm architecture |
1439 | // so the disassembler will return all instructions. Without it we will see a |
1440 | // lot of unknown opcodes if the code uses instructions which are not |
1441 | // available in the oldest arm version (which is used when no sub architecture |
1442 | // is specified). |
1443 | if (triple.getArch() == llvm::Triple::arm && |
1444 | triple.getSubArch() == llvm::Triple::NoSubArch) |
1445 | triple.setArchName("armv9.3a"); |
1446 | |
1447 | std::string features_str; |
1448 | const char *triple_str = triple.getTriple().c_str(); |
1449 | |
1450 | // ARM Cortex M0-M7 devices only execute thumb instructions |
1451 | if (arch.IsAlwaysThumbInstructions()) { |
1452 | triple_str = thumb_arch.GetTriple().getTriple().c_str(); |
1453 | features_str += "+fp-armv8,"; |
1454 | } |
1455 | |
1456 | const char *cpu = ""; |
Value stored to 'cpu' during its initialization is never read | |
1457 | |
1458 | switch (arch.GetCore()) { |
1459 | case ArchSpec::eCore_mips32: |
1460 | case ArchSpec::eCore_mips32el: |
1461 | cpu = "mips32"; |
1462 | break; |
1463 | case ArchSpec::eCore_mips32r2: |
1464 | case ArchSpec::eCore_mips32r2el: |
1465 | cpu = "mips32r2"; |
1466 | break; |
1467 | case ArchSpec::eCore_mips32r3: |
1468 | case ArchSpec::eCore_mips32r3el: |
1469 | cpu = "mips32r3"; |
1470 | break; |
1471 | case ArchSpec::eCore_mips32r5: |
1472 | case ArchSpec::eCore_mips32r5el: |
1473 | cpu = "mips32r5"; |
1474 | break; |
1475 | case ArchSpec::eCore_mips32r6: |
1476 | case ArchSpec::eCore_mips32r6el: |
1477 | cpu = "mips32r6"; |
1478 | break; |
1479 | case ArchSpec::eCore_mips64: |
1480 | case ArchSpec::eCore_mips64el: |
1481 | cpu = "mips64"; |
1482 | break; |
1483 | case ArchSpec::eCore_mips64r2: |
1484 | case ArchSpec::eCore_mips64r2el: |
1485 | cpu = "mips64r2"; |
1486 | break; |
1487 | case ArchSpec::eCore_mips64r3: |
1488 | case ArchSpec::eCore_mips64r3el: |
1489 | cpu = "mips64r3"; |
1490 | break; |
1491 | case ArchSpec::eCore_mips64r5: |
1492 | case ArchSpec::eCore_mips64r5el: |
1493 | cpu = "mips64r5"; |
1494 | break; |
1495 | case ArchSpec::eCore_mips64r6: |
1496 | case ArchSpec::eCore_mips64r6el: |
1497 | cpu = "mips64r6"; |
1498 | break; |
1499 | default: |
1500 | cpu = ""; |
1501 | break; |
1502 | } |
1503 | |
1504 | if (arch.IsMIPS()) { |
1505 | uint32_t arch_flags = arch.GetFlags(); |
1506 | if (arch_flags & ArchSpec::eMIPSAse_msa) |
1507 | features_str += "+msa,"; |
1508 | if (arch_flags & ArchSpec::eMIPSAse_dsp) |
1509 | features_str += "+dsp,"; |
1510 | if (arch_flags & ArchSpec::eMIPSAse_dspr2) |
1511 | features_str += "+dspr2,"; |
1512 | } |
1513 | |
1514 | // If any AArch64 variant, enable latest ISA with all extensions. |
1515 | if (triple.isAArch64()) { |
1516 | features_str += "+all,"; |
1517 | |
1518 | if (triple.getVendor() == llvm::Triple::Apple) |
1519 | cpu = "apple-latest"; |
1520 | } |
1521 | |
1522 | if (triple.isRISCV()) { |
1523 | uint32_t arch_flags = arch.GetFlags(); |
1524 | if (arch_flags & ArchSpec::eRISCV_rvc) |
1525 | features_str += "+c,"; |
1526 | if (arch_flags & ArchSpec::eRISCV_rve) |
1527 | features_str += "+e,"; |
1528 | if ((arch_flags & ArchSpec::eRISCV_float_abi_single) == |
1529 | ArchSpec::eRISCV_float_abi_single) |
1530 | features_str += "+f,"; |
1531 | if ((arch_flags & ArchSpec::eRISCV_float_abi_double) == |
1532 | ArchSpec::eRISCV_float_abi_double) |
1533 | features_str += "+f,+d,"; |
1534 | if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) == |
1535 | ArchSpec::eRISCV_float_abi_quad) |
1536 | features_str += "+f,+d,+q,"; |
1537 | // FIXME: how do we detect features such as `+a`, `+m`? |
1538 | } |
1539 | |
1540 | // We use m_disasm_up.get() to tell whether we are valid or not, so if this |
1541 | // isn't good for some reason, we won't be valid and FindPlugin will fail and |
1542 | // we won't get used. |
1543 | m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), |
1544 | flavor, *this); |
1545 | |
1546 | llvm::Triple::ArchType llvm_arch = triple.getArch(); |
1547 | |
1548 | // For arm CPUs that can execute arm or thumb instructions, also create a |
1549 | // thumb instruction disassembler. |
1550 | if (llvm_arch == llvm::Triple::arm) { |
1551 | std::string thumb_triple(thumb_arch.GetTriple().getTriple()); |
1552 | m_alternate_disasm_up = |
1553 | MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), |
1554 | flavor, *this); |
1555 | if (!m_alternate_disasm_up) |
1556 | m_disasm_up.reset(); |
1557 | |
1558 | } else if (arch.IsMIPS()) { |
1559 | /* Create alternate disassembler for MIPS16 and microMIPS */ |
1560 | uint32_t arch_flags = arch.GetFlags(); |
1561 | if (arch_flags & ArchSpec::eMIPSAse_mips16) |
1562 | features_str += "+mips16,"; |
1563 | else if (arch_flags & ArchSpec::eMIPSAse_micromips) |
1564 | features_str += "+micromips,"; |
1565 | |
1566 | m_alternate_disasm_up = MCDisasmInstance::Create( |
1567 | triple_str, cpu, features_str.c_str(), flavor, *this); |
1568 | if (!m_alternate_disasm_up) |
1569 | m_disasm_up.reset(); |
1570 | } |
1571 | } |
1572 | |
1573 | DisassemblerLLVMC::~DisassemblerLLVMC() = default; |
1574 | |
1575 | Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, |
1576 | const char *flavor) { |
1577 | if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { |
1578 | std::unique_ptr<DisassemblerLLVMC> disasm_up( |
1579 | new DisassemblerLLVMC(arch, flavor)); |
1580 | |
1581 | if (disasm_up.get() && disasm_up->IsValid()) |
1582 | return disasm_up.release(); |
1583 | } |
1584 | return nullptr; |
1585 | } |
1586 | |
1587 | size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, |
1588 | const DataExtractor &data, |
1589 | lldb::offset_t data_offset, |
1590 | size_t num_instructions, |
1591 | bool append, bool data_from_file) { |
1592 | if (!append) |
1593 | m_instruction_list.Clear(); |
1594 | |
1595 | if (!IsValid()) |
1596 | return 0; |
1597 | |
1598 | m_data_from_file = data_from_file; |
1599 | uint32_t data_cursor = data_offset; |
1600 | const size_t data_byte_size = data.GetByteSize(); |
1601 | uint32_t instructions_parsed = 0; |
1602 | Address inst_addr(base_addr); |
1603 | |
1604 | while (data_cursor < data_byte_size && |
1605 | instructions_parsed < num_instructions) { |
1606 | |
1607 | AddressClass address_class = AddressClass::eCode; |
1608 | |
1609 | if (m_alternate_disasm_up) |
1610 | address_class = inst_addr.GetAddressClass(); |
1611 | |
1612 | InstructionSP inst_sp( |
1613 | new InstructionLLVMC(*this, inst_addr, address_class)); |
1614 | |
1615 | if (!inst_sp) |
1616 | break; |
1617 | |
1618 | uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); |
1619 | |
1620 | if (inst_size == 0) |
1621 | break; |
1622 | |
1623 | m_instruction_list.Append(inst_sp); |
1624 | data_cursor += inst_size; |
1625 | inst_addr.Slide(inst_size); |
1626 | instructions_parsed++; |
1627 | } |
1628 | |
1629 | return data_cursor - data_offset; |
1630 | } |
1631 | |
1632 | void DisassemblerLLVMC::Initialize() { |
1633 | PluginManager::RegisterPlugin(GetPluginNameStatic(), |
1634 | "Disassembler that uses LLVM MC to disassemble " |
1635 | "i386, x86_64, ARM, and ARM64.", |
1636 | CreateInstance); |
1637 | |
1638 | llvm::InitializeAllTargetInfos(); |
1639 | llvm::InitializeAllTargetMCs(); |
1640 | llvm::InitializeAllAsmParsers(); |
1641 | llvm::InitializeAllDisassemblers(); |
1642 | } |
1643 | |
1644 | void DisassemblerLLVMC::Terminate() { |
1645 | PluginManager::UnregisterPlugin(CreateInstance); |
1646 | } |
1647 | |
1648 | int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, |
1649 | uint64_t offset, uint64_t size, |
1650 | int tag_type, void *tag_bug) { |
1651 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1652 | ->OpInfo(pc, offset, size, tag_type, tag_bug); |
1653 | } |
1654 | |
1655 | const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, |
1656 | uint64_t value, |
1657 | uint64_t *type, uint64_t pc, |
1658 | const char **name) { |
1659 | return static_cast<DisassemblerLLVMC *>(disassembler) |
1660 | ->SymbolLookup(value, type, pc, name); |
1661 | } |
1662 | |
1663 | bool DisassemblerLLVMC::FlavorValidForArchSpec( |
1664 | const lldb_private::ArchSpec &arch, const char *flavor) { |
1665 | llvm::Triple triple = arch.GetTriple(); |
1666 | if (flavor == nullptr || strcmp(flavor, "default") == 0) |
1667 | return true; |
1668 | |
1669 | if (triple.getArch() == llvm::Triple::x86 || |
1670 | triple.getArch() == llvm::Triple::x86_64) { |
1671 | return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; |
1672 | } else |
1673 | return false; |
1674 | } |
1675 | |
1676 | bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } |
1677 | |
1678 | int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, |
1679 | int tag_type, void *tag_bug) { |
1680 | switch (tag_type) { |
1681 | default: |
1682 | break; |
1683 | case 1: |
1684 | memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); |
1685 | break; |
1686 | } |
1687 | return 0; |
1688 | } |
1689 | |
1690 | const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, |
1691 | uint64_t pc, const char **name) { |
1692 | if (*type_ptr) { |
1693 | if (m_exe_ctx && m_inst) { |
1694 | // std::string remove_this_prior_to_checkin; |
1695 | Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; |
1696 | Address value_so_addr; |
1697 | Address pc_so_addr; |
1698 | if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 || |
1699 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be || |
1700 | target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) { |
1701 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP0x100000001) { |
1702 | m_adrp_address = pc; |
1703 | m_adrp_insn = value; |
1704 | *name = nullptr; |
1705 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None0; |
1706 | return nullptr; |
1707 | } |
1708 | // If this instruction is an ADD and |
1709 | // the previous instruction was an ADRP and |
1710 | // the ADRP's register and this ADD's register are the same, |
1711 | // then this is a pc-relative address calculation. |
1712 | if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri0x100000002 && |
1713 | m_adrp_insn && m_adrp_address == pc - 4 && |
1714 | (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) { |
1715 | uint32_t addxri_inst; |
1716 | uint64_t adrp_imm, addxri_imm; |
1717 | // Get immlo and immhi bits, OR them together to get the ADRP imm |
1718 | // value. |
1719 | adrp_imm = |
1720 | ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3); |
1721 | // if high bit of immhi after right-shifting set, sign extend |
1722 | if (adrp_imm & (1ULL << 20)) |
1723 | adrp_imm |= ~((1ULL << 21) - 1); |
1724 | |
1725 | addxri_inst = value; |
1726 | addxri_imm = (addxri_inst >> 10) & 0xfff; |
1727 | // check if 'sh' bit is set, shift imm value up if so |
1728 | // (this would make no sense, ADRP already gave us this part) |
1729 | if ((addxri_inst >> (12 + 5 + 5)) & 1) |
1730 | addxri_imm <<= 12; |
1731 | value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) + |
1732 | addxri_imm; |
1733 | } |
1734 | m_adrp_address = LLDB_INVALID_ADDRESS(18446744073709551615UL); |
1735 | m_adrp_insn.reset(); |
1736 | } |
1737 | |
1738 | if (m_inst->UsingFileAddress()) { |
1739 | ModuleSP module_sp(m_inst->GetAddress().GetModule()); |
1740 | if (module_sp) { |
1741 | module_sp->ResolveFileAddress(value, value_so_addr); |
1742 | module_sp->ResolveFileAddress(pc, pc_so_addr); |
1743 | } |
1744 | } else if (target && !target->GetSectionLoadList().IsEmpty()) { |
1745 | target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr); |
1746 | target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr); |
1747 | } |
1748 | |
1749 | SymbolContext sym_ctx; |
1750 | const SymbolContextItem resolve_scope = |
1751 | eSymbolContextFunction | eSymbolContextSymbol; |
1752 | if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { |
1753 | pc_so_addr.GetModule()->ResolveSymbolContextForAddress( |
1754 | pc_so_addr, resolve_scope, sym_ctx); |
1755 | } |
1756 | |
1757 | if (value_so_addr.IsValid() && value_so_addr.GetSection()) { |
1758 | StreamString ss; |
1759 | |
1760 | bool format_omitting_current_func_name = false; |
1761 | if (sym_ctx.symbol || sym_ctx.function) { |
1762 | AddressRange range; |
1763 | if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) && |
1764 | range.GetBaseAddress().IsValid() && |
1765 | range.ContainsLoadAddress(value_so_addr, target)) { |
1766 | format_omitting_current_func_name = true; |
1767 | } |
1768 | } |
1769 | |
1770 | // If the "value" address (the target address we're symbolicating) is |
1771 | // inside the same SymbolContext as the current instruction pc |
1772 | // (pc_so_addr), don't print the full function name - just print it |
1773 | // with DumpStyleNoFunctionName style, e.g. "<+36>". |
1774 | if (format_omitting_current_func_name) { |
1775 | value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, |
1776 | Address::DumpStyleSectionNameOffset); |
1777 | } else { |
1778 | value_so_addr.Dump( |
1779 | &ss, target, |
1780 | Address::DumpStyleResolvedDescriptionNoFunctionArguments, |
1781 | Address::DumpStyleSectionNameOffset); |
1782 | } |
1783 | |
1784 | if (!ss.GetString().empty()) { |
1785 | // If Address::Dump returned a multi-line description, most commonly |
1786 | // seen when we have multiple levels of inlined functions at an |
1787 | // address, only show the first line. |
1788 | std::string str = std::string(ss.GetString()); |
1789 | size_t first_eol_char = str.find_first_of("\r\n"); |
1790 | if (first_eol_char != std::string::npos) { |
1791 | str.erase(first_eol_char); |
1792 | } |
1793 | m_inst->AppendComment(str); |
1794 | } |
1795 | } |
1796 | } |
1797 | } |
1798 | |
1799 | // TODO: llvm-objdump sets the type_ptr to the |
1800 | // LLVMDisassembler_ReferenceType_Out_* values |
1801 | // based on where value_so_addr is pointing, with |
1802 | // Mach-O specific augmentations in MachODump.cpp. e.g. |
1803 | // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand |
1804 | // handles. |
1805 | *type_ptr = LLVMDisassembler_ReferenceType_InOut_None0; |
1806 | *name = nullptr; |
1807 | return nullptr; |
1808 | } |