LLVM 19.0.0git
LVBinaryReader.cpp
Go to the documentation of this file.
1//===-- LVBinaryReader.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the LVBinaryReader class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/Support/Errc.h"
17
18using namespace llvm;
19using namespace llvm::logicalview;
20
21#define DEBUG_TYPE "BinaryReader"
22
23// Function names extracted from the object symbol table.
25 LVSectionIndex SectionIndex) {
26 std::string SymbolName(Name);
27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) {
28 SymbolNames.emplace(
29 std::piecewise_construct, std::forward_as_tuple(SymbolName),
30 std::forward_as_tuple(Function, 0, SectionIndex, false));
31 } else {
32 // Update a recorded entry with its logical scope and section index.
33 SymbolNames[SymbolName].Scope = Function;
34 if (SectionIndex)
35 SymbolNames[SymbolName].SectionIndex = SectionIndex;
36 }
37
38 if (Function && SymbolNames[SymbolName].IsComdat)
39 Function->setIsComdat();
40
41 LLVM_DEBUG({ print(dbgs()); });
42}
43
45 LVSectionIndex SectionIndex, bool IsComdat) {
46 std::string SymbolName(Name);
47 if (SymbolNames.find(SymbolName) == SymbolNames.end())
48 SymbolNames.emplace(
49 std::piecewise_construct, std::forward_as_tuple(SymbolName),
50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat));
51 else
52 // Update a recorded symbol name with its logical scope.
53 SymbolNames[SymbolName].Address = Address;
54
55 LVScope *Function = SymbolNames[SymbolName].Scope;
56 if (Function && IsComdat)
57 Function->setIsComdat();
58 LLVM_DEBUG({ print(dbgs()); });
59}
60
63 StringRef Name = Function->getLinkageName();
64 if (Name.empty())
66 std::string SymbolName(Name);
67
68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end()))
69 return SectionIndex;
70
71 // Update a recorded entry with its logical scope, only if the scope has
72 // ranges. That is the case when in DWARF there are 2 DIEs connected via
73 // the DW_AT_specification.
74 if (Function->getHasRanges()) {
75 SymbolNames[SymbolName].Scope = Function;
76 SectionIndex = SymbolNames[SymbolName].SectionIndex;
77 } else {
78 SectionIndex = UndefinedSectionIndex;
79 }
80
81 if (SymbolNames[SymbolName].IsComdat)
82 Function->setIsComdat();
83
84 LLVM_DEBUG({ print(dbgs()); });
85 return SectionIndex;
86}
87
90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
91 return Iter != SymbolNames.end() ? Iter->second : Empty;
92}
94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
95 return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96}
98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex
101}
103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name));
104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105}
106
108 OS << "Symbol Table\n";
109 for (LVSymbolNames::reference Entry : SymbolNames) {
110 LVSymbolTableEntry &SymbolName = Entry.second;
111 LVScope *Scope = SymbolName.Scope;
112 LVOffset Offset = Scope ? Scope->getOffset() : 0;
113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5)
114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115 << " Scope: " << hexValue(Offset)
116 << " Address: " << hexValue(SymbolName.Address)
117 << " Name: " << Entry.first << "\n";
118 }
119}
120
122 LVSectionIndex SectionIndex) {
123 SymbolTable.add(Name, Function, SectionIndex);
124}
126 LVSectionIndex SectionIndex,
127 bool IsComdat) {
128 SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129}
131 return SymbolTable.update(Function);
132}
133
135 return SymbolTable.getEntry(Name);
136}
138 return SymbolTable.getAddress(Name);
139}
141 return SymbolTable.getIndex(Name);
142}
144 return SymbolTable.getIsComdat(Name);
145}
146
148 for (const object::SectionRef &Section : Obj.sections()) {
149 LLVM_DEBUG({
150 Expected<StringRef> SectionNameOrErr = Section.getName();
152 if (!SectionNameOrErr)
153 consumeError(SectionNameOrErr.takeError());
154 else
155 Name = *SectionNameOrErr;
156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
157 << "Address: " << hexValue(Section.getAddress()) << ", "
158 << "Size: " << hexValue(Section.getSize()) << ", "
159 << "Name: " << Name << "\n";
160 dbgs() << "isCompressed: " << Section.isCompressed() << ", "
161 << "isText: " << Section.isText() << ", "
162 << "isData: " << Section.isData() << ", "
163 << "isBSS: " << Section.isBSS() << ", "
164 << "isVirtual: " << Section.isVirtual() << "\n";
165 dbgs() << "isBitcode: " << Section.isBitcode() << ", "
166 << "isStripped: " << Section.isStripped() << ", "
167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
169 << "isDebugSection: " << Section.isDebugSection() << "\n";
170 dbgs() << "\n";
171 });
172
173 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
174 continue;
175
176 // Record section information required for symbol resolution.
177 // Note: The section index returned by 'getIndex()' is one based.
178 Sections.emplace(Section.getIndex(), Section);
179 addSectionAddress(Section);
180
181 // Identify the ".text" section.
182 Expected<StringRef> SectionNameOrErr = Section.getName();
183 if (!SectionNameOrErr) {
184 consumeError(SectionNameOrErr.takeError());
185 continue;
186 }
187 if ((*SectionNameOrErr).equals(".text") ||
188 (*SectionNameOrErr).equals("CODE") ||
189 (*SectionNameOrErr).equals(".code")) {
190 DotTextSectionIndex = Section.getIndex();
191 // If the object is WebAssembly, update the address offset that
192 // will be added to DWARF DW_AT_* attributes.
193 if (Obj.isWasm())
194 WasmCodeSectionOffset = Section.getAddress();
195 }
196 }
197
198 // Process the symbol table.
199 mapRangeAddress(Obj);
200
201 LLVM_DEBUG({
202 dbgs() << "\nSections Information:\n";
203 for (LVSections::reference Entry : Sections) {
204 LVSectionIndex SectionIndex = Entry.first;
205 const object::SectionRef Section = Entry.second;
206 Expected<StringRef> SectionNameOrErr = Section.getName();
207 if (!SectionNameOrErr)
208 consumeError(SectionNameOrErr.takeError());
209 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
210 << " Name: " << *SectionNameOrErr << "\n"
211 << "Size: " << hexValue(Section.getSize()) << "\n"
212 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
213 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
214 }
215 dbgs() << "\nObject Section Information:\n";
216 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
217 dbgs() << "[" << hexValue(Entry.first) << ":"
218 << hexValue(Entry.first + Entry.second.getSize())
219 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
220 });
221}
222
224 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
225 if (ImageBase)
226 ImageBaseAddress = ImageBase.get();
227
228 LLVM_DEBUG({
229 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
230 });
231
233
234 for (const object::SectionRef &Section : COFFObj.sections()) {
235 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
236 continue;
237
238 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
239 VirtualAddress = COFFSection->VirtualAddress;
240 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
241
242 // Record section information required for symbol resolution.
243 // Note: The section index returned by 'getIndex()' is zero based.
244 Sections.emplace(Section.getIndex() + 1, Section);
245 addSectionAddress(Section);
246
247 // Additional initialization on the specific object format.
248 mapRangeAddress(COFFObj, Section, IsComdat);
249 }
250
251 LLVM_DEBUG({
252 dbgs() << "\nSections Information:\n";
253 for (LVSections::reference Entry : Sections) {
254 LVSectionIndex SectionIndex = Entry.first;
255 const object::SectionRef Section = Entry.second;
256 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
257 Expected<StringRef> SectionNameOrErr = Section.getName();
258 if (!SectionNameOrErr)
259 consumeError(SectionNameOrErr.takeError());
260 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
261 << " Name: " << *SectionNameOrErr << "\n"
262 << "Size: " << hexValue(Section.getSize()) << "\n"
263 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
264 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
265 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
266 << "\n"
267 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
268 << "\n";
269 }
270 dbgs() << "\nObject Section Information:\n";
271 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
272 dbgs() << "[" << hexValue(Entry.first) << ":"
273 << hexValue(Entry.first + Entry.second.getSize())
274 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
275 });
276}
277
279 StringRef TheFeatures) {
280 std::string TargetLookupError;
281 const Target *TheTarget =
282 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError);
283 if (!TheTarget)
284 return createStringError(errc::invalid_argument, TargetLookupError.c_str());
285
286 // Register information.
287 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple);
288 if (!RegisterInfo)
290 "no register info for target " + TheTriple);
291 MRI.reset(RegisterInfo);
292
293 // Assembler properties and features.
294 MCTargetOptions MCOptions;
295 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions));
296 if (!AsmInfo)
298 "no assembly info for target " + TheTriple);
299 MAI.reset(AsmInfo);
300
301 // Target subtargets.
302 StringRef CPU;
303 MCSubtargetInfo *SubtargetInfo(
304 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures));
305 if (!SubtargetInfo)
307 "no subtarget info for target " + TheTriple);
308 STI.reset(SubtargetInfo);
309
310 // Instructions Info.
311 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
312 if (!InstructionInfo)
314 "no instruction info for target " + TheTriple);
315 MII.reset(InstructionInfo);
316
317 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(),
318 STI.get());
319
320 // Assembler.
321 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC));
322 if (!DisAsm)
324 "no disassembler for target " + TheTriple);
325 MD.reset(DisAsm);
326
327 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
328 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI));
329 if (!InstructionPrinter)
331 "no target assembly language printer for target " +
332 TheTriple);
333 MIP.reset(InstructionPrinter);
334 InstructionPrinter->setPrintImmHex(true);
335
336 return Error::success();
337}
338
341 LVSectionIndex SectionIndex) {
342 // Return the 'text' section with the code for this logical scope.
343 // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
344 // ELF: SectionIndex is the section index in the file.
345 if (SectionIndex) {
346 LVSections::iterator Iter = Sections.find(SectionIndex);
347 if (Iter == Sections.end()) {
349 "invalid section index for: '%s'",
350 Scope->getName().str().c_str());
351 }
352 const object::SectionRef Section = Iter->second;
353 return std::make_pair(Section.getAddress(), Section);
354 }
355
356 // Ensure a valid starting address for the public names.
357 LVSectionAddresses::const_iterator Iter =
358 SectionAddresses.upper_bound(Address);
359 if (Iter == SectionAddresses.begin())
361 "invalid section address for: '%s'",
362 Scope->getName().str().c_str());
363
364 // Get section that contains the code for this function.
365 Iter = SectionAddresses.lower_bound(Address);
366 if (Iter != SectionAddresses.begin())
367 --Iter;
368 return std::make_pair(Iter->first, Iter->second);
369}
370
372 LVScope *Scope) {
373 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
374 ScopesWithRanges->addEntry(Scope);
375}
376
378 LVScope *Scope, LVAddress LowerAddress,
379 LVAddress UpperAddress) {
380 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
381 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress);
382}
383
385 // Check if we already have a mapping for this section index.
386 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
387 if (IterSection == SectionRanges.end())
388 IterSection =
389 SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
390 LVRange *Range = IterSection->second.get();
391 assert(Range && "Range is null.");
392 return Range;
393}
394
396 LVSectionIndex SectionIndex,
397 const LVNameInfo &NameInfo) {
398 assert(Scope && "Scope is null.");
399
400 // Skip stripped functions.
401 if (Scope->getIsDiscarded())
402 return Error::success();
403
404 // Find associated address and size for the given function entry point.
405 LVAddress Address = NameInfo.first;
406 uint64_t Size = NameInfo.second;
407
408 LLVM_DEBUG({
409 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
410 << Scope->getLinkageName() << "'\n"
411 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
412 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
413 });
414
416 getSection(Scope, Address, SectionIndex);
417 if (!SectionOrErr)
418 return SectionOrErr.takeError();
419 const object::SectionRef Section = (*SectionOrErr).second;
420 uint64_t SectionAddress = (*SectionOrErr).first;
421
422 Expected<StringRef> SectionContentsOrErr = Section.getContents();
423 if (!SectionContentsOrErr)
424 return SectionOrErr.takeError();
425
426 // There are cases where the section size is smaller than the [LowPC,HighPC]
427 // range; it causes us to decode invalid addresses. The recorded size in the
428 // logical scope is one less than the real size.
429 LLVM_DEBUG({
430 dbgs() << " Size: " << hexValue(Size)
431 << ", Section Size: " << hexValue(Section.getSize()) << "\n";
432 });
433 Size = std::min(Size + 1, Section.getSize());
434
435 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr);
436 uint64_t Offset = Address - SectionAddress;
437 uint8_t const *Begin = Bytes.data() + Offset;
438 uint8_t const *End = Bytes.data() + Offset + Size;
439
440 LLVM_DEBUG({
441 Expected<StringRef> SectionNameOrErr = Section.getName();
442 if (!SectionNameOrErr)
443 consumeError(SectionNameOrErr.takeError());
444 else
445 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
446 << hexValue((uint64_t)Section.getAddress()) << ":"
447 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
448 << "] Name: '" << *SectionNameOrErr << "'\n"
449 << "Begin: " << hexValue((uint64_t)Begin)
450 << ", End: " << hexValue((uint64_t)End) << "\n";
451 });
452
453 // Address for first instruction line.
454 LVAddress FirstAddress = Address;
455 auto InstructionsSP = std::make_unique<LVLines>();
456 LVLines &Instructions = *InstructionsSP;
457 DiscoveredLines.emplace_back(std::move(InstructionsSP));
458
459 while (Begin < End) {
461 uint64_t BytesConsumed = 0;
462 SmallVector<char, 64> InsnStr;
465 MD->getInstruction(Instruction, BytesConsumed,
466 ArrayRef<uint8_t>(Begin, End), Address, outs());
467 switch (S) {
469 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
470 if (BytesConsumed == 0)
471 // Skip invalid bytes
472 BytesConsumed = 1;
473 break;
475 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
476 [[fallthrough]];
478 std::string Buffer;
479 raw_string_ostream Stream(Buffer);
480 StringRef AnnotationsStr = Annotations.str();
481 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream);
482 LLVM_DEBUG({
483 std::string BufferCodes;
484 raw_string_ostream StreamCodes(BufferCodes);
485 StreamCodes << format_bytes(
486 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
487 16);
488 dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
489 << "Size: " << format_decimal(BytesConsumed, 2) << " ("
490 << formatv("{0}",
491 fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
492 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
493 << "\n";
494 });
495 // Here we add logical lines to the Instructions. Later on,
496 // the 'processLines()' function will move each created logical line
497 // to its enclosing logical scope, using the debug ranges information
498 // and they will be released when its scope parent is deleted.
499 LVLineAssembler *Line = createLineAssembler();
500 Line->setAddress(Address);
501 Line->setName(StringRef(Stream.str()).trim());
502 Instructions.push_back(Line);
503 break;
504 }
505 }
506 Address += BytesConsumed;
507 Begin += BytesConsumed;
508 }
509
510 LLVM_DEBUG({
511 size_t Index = 0;
512 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
513 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
514 << "Address: " << hexValue(FirstAddress)
515 << format(" - Collected instructions lines: %d\n",
516 Instructions.size());
517 for (const LVLine *Line : Instructions)
518 dbgs() << format_decimal(++Index, 5) << ": "
519 << hexValue(Line->getOffset()) << ", (" << Line->getName()
520 << ")\n";
521 });
522
523 // The scope in the assembler names is linked to its own instructions.
524 ScopeInstructions.add(SectionIndex, Scope, &Instructions);
525 AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
526
527 return Error::success();
528}
529
531 LVSectionIndex SectionIndex) {
532 if (!options().getPrintInstructions())
533 return Error::success();
534
535 LVNameInfo Name = CompileUnit->findPublicName(Function);
536 if (Name.first != LVAddress(UINT64_MAX))
537 return createInstructions(Function, SectionIndex, Name);
538
539 return Error::success();
540}
541
543 if (!options().getPrintInstructions())
544 return Error::success();
545
546 LLVM_DEBUG({
547 size_t Index = 1;
548 dbgs() << "\nPublic Names (Scope):\n";
549 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
550 LVScope *Scope = Name.first;
551 const LVNameInfo &NameInfo = Name.second;
552 LVAddress Address = NameInfo.first;
553 uint64_t Size = NameInfo.second;
554 dbgs() << format_decimal(Index++, 5) << ": "
555 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
556 << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
557 << "Name: '" << Scope->getName() << "' / '"
558 << Scope->getLinkageName() << "'\n";
559 }
560 });
561
562 // For each public name in the current compile unit, create the line
563 // records that represent the executable instructions.
564 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
565 LVScope *Scope = Name.first;
566 // The symbol table extracted from the object file always contains a
567 // non-empty name (linkage name). However, the logical scope does not
568 // guarantee to have a name for the linkage name (main is one case).
569 // For those cases, set the linkage name the same as the name.
570 if (!Scope->getLinkageNameIndex())
571 Scope->setLinkageName(Scope->getName());
572 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName());
573 if (Error Err = createInstructions(Scope, SectionIndex, Name.second))
574 return Err;
575 }
576
577 return Error::success();
578}
579
580// During the traversal of the debug information sections, we created the
581// logical lines representing the disassembled instructions from the text
582// section and the logical lines representing the line records from the
583// debug line section. Using the ranges associated with the logical scopes,
584// we will allocate those logical lines to their logical scopes.
586 LVSectionIndex SectionIndex,
587 LVScope *Function) {
588 assert(DebugLines && "DebugLines is null.");
589
590 // Just return if this compilation unit does not have any line records
591 // and no instruction lines were created.
592 if (DebugLines->empty() && !options().getPrintInstructions())
593 return;
594
595 // Merge the debug lines and instruction lines using their text address;
596 // the logical line representing the debug line record is followed by the
597 // line(s) representing the disassembled instructions, whose addresses are
598 // equal or greater that the line address and less than the address of the
599 // next debug line record.
600 LLVM_DEBUG({
601 size_t Index = 1;
602 size_t PerLine = 4;
603 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
604 for (const LVLine *Line : *DebugLines) {
605 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
606 << ", (" << Line->getLineNumber() << ")"
607 << ((Index % PerLine) ? " " : "\n");
608 ++Index;
609 }
610 dbgs() << ((Index % PerLine) ? "\n" : "");
611 });
612
613 bool TraverseLines = true;
614 LVLines::iterator Iter = DebugLines->begin();
615 while (TraverseLines && Iter != DebugLines->end()) {
616 uint64_t DebugAddress = (*Iter)->getAddress();
617
618 // Get the function with an entry point that matches this line and
619 // its associated assembler entries. In the case of COMDAT, the input
620 // 'Function' is not null. Use it to find its address ranges.
621 LVScope *Scope = Function;
622 if (!Function) {
623 Scope = AssemblerMappings.find(SectionIndex, DebugAddress);
624 if (!Scope) {
625 ++Iter;
626 continue;
627 }
628 }
629
630 // Get the associated instructions for the found 'Scope'.
631 LVLines InstructionLines;
632 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope);
633 if (Lines)
634 InstructionLines = std::move(*Lines);
635
636 LLVM_DEBUG({
637 size_t Index = 0;
638 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
639 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
640 << format("Process instruction lines: %d\n",
641 InstructionLines.size());
642 for (const LVLine *Line : InstructionLines)
643 dbgs() << format_decimal(++Index, 5) << ": "
644 << hexValue(Line->getOffset()) << ", (" << Line->getName()
645 << ")\n";
646 });
647
648 // Continue with next debug line if there are not instructions lines.
649 if (InstructionLines.empty()) {
650 ++Iter;
651 continue;
652 }
653
654 for (LVLine *InstructionLine : InstructionLines) {
655 uint64_t InstructionAddress = InstructionLine->getAddress();
656 LLVM_DEBUG({
657 dbgs() << "Instruction address: " << hexValue(InstructionAddress)
658 << "\n";
659 });
660 if (TraverseLines) {
661 while (Iter != DebugLines->end()) {
662 DebugAddress = (*Iter)->getAddress();
663 LLVM_DEBUG({
664 bool IsDebug = (*Iter)->getIsLineDebug();
665 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
666 << hexValue(DebugAddress) << "]";
667 if (IsDebug)
668 dbgs() << format(" %d", (*Iter)->getLineNumber());
669 dbgs() << "\n";
670 });
671 // Instruction address before debug line.
672 if (InstructionAddress < DebugAddress) {
673 LLVM_DEBUG({
674 dbgs() << "Inserted instruction address: "
675 << hexValue(InstructionAddress) << " before line: "
676 << format("%d", (*Iter)->getLineNumber()) << " ["
677 << hexValue(DebugAddress) << "]\n";
678 });
679 Iter = DebugLines->insert(Iter, InstructionLine);
680 // The returned iterator points to the inserted instruction.
681 // Skip it and point to the line acting as reference.
682 ++Iter;
683 break;
684 }
685 ++Iter;
686 }
687 if (Iter == DebugLines->end()) {
688 // We have reached the end of the source lines and the current
689 // instruction line address is greater than the last source line.
690 TraverseLines = false;
691 DebugLines->push_back(InstructionLine);
692 }
693 } else {
694 DebugLines->push_back(InstructionLine);
695 }
696 }
697 }
698
699 LLVM_DEBUG({
700 dbgs() << format("Lines after merge: %d\n", DebugLines->size());
701 size_t Index = 0;
702 for (const LVLine *Line : *DebugLines) {
703 dbgs() << format_decimal(++Index, 5) << ": "
704 << hexValue(Line->getOffset()) << ", ("
705 << ((Line->getIsLineDebug())
706 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
707 : Line->getName())
708 << ")\n";
709 }
710 });
711
712 // If this compilation unit does not have line records, traverse its scopes
713 // and take any collected instruction lines as the working set in order
714 // to move them to their associated scope.
715 if (DebugLines->empty()) {
716 if (const LVScopes *Scopes = CompileUnit->getScopes())
717 for (LVScope *Scope : *Scopes) {
718 LVLines *Lines = ScopeInstructions.find(Scope);
719 if (Lines) {
720
721 LLVM_DEBUG({
722 size_t Index = 0;
723 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
724 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
725 << format("Instruction lines: %d\n", Lines->size());
726 for (const LVLine *Line : *Lines)
727 dbgs() << format_decimal(++Index, 5) << ": "
728 << hexValue(Line->getOffset()) << ", (" << Line->getName()
729 << ")\n";
730 });
731
732 if (Scope->getIsArtificial()) {
733 // Add the instruction lines to their artificial scope.
734 for (LVLine *Line : *Lines)
735 Scope->addElement(Line);
736 } else {
737 DebugLines->append(*Lines);
738 }
739 Lines->clear();
740 }
741 }
742 }
743
744 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
745 ScopesWithRanges->startSearch();
746
747 // Process collected lines.
748 LVScope *Scope;
749 for (LVLine *Line : *DebugLines) {
750 // Using the current line address, get its associated lexical scope and
751 // add the line information to it.
752 Scope = ScopesWithRanges->getEntry(Line->getAddress());
753 if (!Scope) {
754 // If missing scope, use the compile unit.
755 Scope = CompileUnit;
756 LLVM_DEBUG({
757 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
758 << ((Line->getIsLineDebug())
759 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
760 : Line->getName())
761 << ")\n";
762 });
763 }
764
765 // Add line object to scope.
766 Scope->addElement(Line);
767
768 // Report any line zero.
769 if (options().getWarningLines() && Line->getIsLineDebug() &&
770 !Line->getLineNumber())
771 CompileUnit->addLineZero(Line);
772
773 // Some compilers generate ranges in the compile unit; other compilers
774 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
775 // variables, we need to generate the map ranges for the compile unit.
776 // If we use the ranges stored at the scope level, there are cases where
777 // the address referenced by a symbol location, is not in the enclosing
778 // scope, but in an outer one. By using the ranges stored in the compile
779 // unit, we can catch all those addresses.
780 if (Line->getIsLineDebug())
781 CompileUnit->addMapping(Line, SectionIndex);
782
783 // Resolve any given pattern.
785 }
786
787 ScopesWithRanges->endSearch();
788}
789
791 LVSectionIndex SectionIndex) {
792 assert(DebugLines && "DebugLines is null.");
793 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex))
794 return;
795
796 // If the Compile Unit does not contain comdat functions, use the whole
797 // set of debug lines, as the addresses don't have conflicts.
798 if (!CompileUnit->getHasComdatScopes()) {
799 processLines(DebugLines, SectionIndex, nullptr);
800 return;
801 }
802
803 // Find the indexes for the lines whose address is zero.
804 std::vector<size_t> AddressZero;
806 std::find_if(std::begin(*DebugLines), std::end(*DebugLines),
807 [](LVLine *Line) { return !Line->getAddress(); });
808 while (It != std::end(*DebugLines)) {
809 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It));
810 It = std::find_if(std::next(It), std::end(*DebugLines),
811 [](LVLine *Line) { return !Line->getAddress(); });
812 }
813
814 // If the set of debug lines does not contain any line with address zero,
815 // use the whole set. It means we are dealing with an initialization
816 // section from a fully linked binary.
817 if (AddressZero.empty()) {
818 processLines(DebugLines, SectionIndex, nullptr);
819 return;
820 }
821
822 // The Compile unit contains comdat functions. Traverse the collected
823 // debug lines and identify logical groups based on their start and
824 // address. Each group starts with a zero address.
825 // Begin, End, Address, IsDone.
826 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
827 std::vector<LVBucket> Buckets;
828
830 size_t Begin = 0;
831 size_t End = 0;
832 size_t Index = 0;
833 for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
834 Begin = AddressZero[Index];
835 End = AddressZero[Index + 1] - 1;
836 Address = (*DebugLines)[End]->getAddress();
837 Buckets.emplace_back(Begin, End, Address, false);
838 }
839
840 // Add the last bucket.
841 if (Index) {
842 Begin = AddressZero[Index];
843 End = DebugLines->size() - 1;
844 Address = (*DebugLines)[End]->getAddress();
845 Buckets.emplace_back(Begin, End, Address, false);
846 }
847
848 LLVM_DEBUG({
849 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
850 for (LVBucket &Bucket : Buckets) {
851 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
852 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
853 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
854 }
855 });
856
857 // Traverse the sections and buckets looking for matches on the section
858 // sizes. In the unlikely event of different buckets with the same size
859 // process them in order and mark them as done.
860 LVLines Group;
861 for (LVSections::reference Entry : Sections) {
862 LVSectionIndex SectionIndex = Entry.first;
863 const object::SectionRef Section = Entry.second;
864 uint64_t Size = Section.getSize();
865 LLVM_DEBUG({
866 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
867 << " , Section Size: " << hexValue(Section.getSize())
868 << " , Section Address: " << hexValue(Section.getAddress())
869 << "\n";
870 });
871
872 for (LVBucket &Bucket : Buckets) {
873 if (std::get<3>(Bucket))
874 // Already done for previous section.
875 continue;
876 if (Size == std::get<2>(Bucket)) {
877 // We have a match on the section size.
878 Group.clear();
879 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket);
880 LVLines::iterator IterEnd =
881 DebugLines->begin() + std::get<1>(Bucket) + 1;
882 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
883 Group.push_back(*Iter);
884 processLines(&Group, SectionIndex, /*Function=*/nullptr);
885 std::get<3>(Bucket) = true;
886 break;
887 }
888 }
889 }
890}
891
892// Traverse the scopes for the given 'Function' looking for any inlined
893// scopes with inlined lines, which are found in 'CUInlineeLines'.
895 LVScope *Function) {
897 std::function<void(LVScope * Parent)> FindInlinedScopes =
898 [&](LVScope *Parent) {
899 if (const LVScopes *Scopes = Parent->getScopes())
900 for (LVScope *Scope : *Scopes) {
901 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
902 if (Iter != CUInlineeLines.end())
903 InlineeIters.push_back(Iter);
904 FindInlinedScopes(Scope);
905 }
906 };
907
908 // Find all inlined scopes for the given 'Function'.
909 FindInlinedScopes(Function);
910 for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
911 LVScope *Scope = InlineeIter->first;
912 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
913
914 // TODO: Convert this into a reference.
915 LVLines *InlineeLines = InlineeIter->second.get();
916 LLVM_DEBUG({
917 dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
918 for (const LVLine *Line : *InlineeLines)
919 dbgs() << "[" << hexValue(Line->getAddress()) << "] "
920 << Line->getLineNumber() << "\n";
921 dbgs() << format("Debug lines: %d\n", CULines.size());
922 for (const LVLine *Line : CULines)
923 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
924 << Line->getLineNumber() << ")\n";
925 ;
926 });
927
928 // The inlined lines must be merged using its address, in order to keep
929 // the real order of the instructions. The inlined lines are mixed with
930 // the other non-inlined lines.
931 if (InlineeLines->size()) {
932 // First address of inlinee code.
933 uint64_t InlineeStart = (InlineeLines->front())->getAddress();
934 LVLines::iterator Iter = std::find_if(
935 CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool {
936 return Item->getAddress() == InlineeStart;
937 });
938 if (Iter != CULines.end()) {
939 // 'Iter' points to the line where the inlined function is called.
940 // Emulate the DW_AT_call_line attribute.
941 Scope->setCallLineNumber((*Iter)->getLineNumber());
942 // Mark the referenced line as the start of the inlined function.
943 // Skip the first line during the insertion, as the address and
944 // line number as the same. Otherwise we have to erase and insert.
945 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
946 ++Iter;
947 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
948 }
949 }
950
951 // Remove this set of lines from the container; each inlined function
952 // creates an unique set of lines. Remove only the created container.
953 CUInlineeLines.erase(InlineeIter);
954 InlineeLines->clear();
955 }
956 LLVM_DEBUG({
957 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
958 dbgs() << format("Debug lines: %d\n", CULines.size());
959 for (const LVLine *Line : CULines)
960 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
961 << Line->getLineNumber() << ")\n";
962 ;
963 });
964}
965
967 OS << "LVBinaryReader\n";
968 LLVM_DEBUG(dbgs() << "PrintReader\n");
969}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
Annotations lets you mark points and ranges inside source code, for tests:
Definition: Annotations.h:53
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T * data() const
Definition: ArrayRef.h:162
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:149
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
Tagged union holding either a T or a Error.
Definition: Error.h:474
Error takeError()
Take ownership of the stored error.
Definition: Error.h:601
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
unsigned getAssemblerDialect() const
Definition: MCAsmInfo.h:682
Superclass for all disassemblers.
DecodeStatus
Ternary decode status.
This is an instance of a target assembly language printer that converts an MCInst to valid target ass...
Definition: MCInstPrinter.h:45
void setPrintImmHex(bool Value)
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition: StringRef.h:811
Target - Wrapper for Target specific information.
MCSubtargetInfo * createMCSubtargetInfo(StringRef TheTriple, StringRef CPU, StringRef Features) const
createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
MCRegisterInfo * createMCRegInfo(StringRef TT) const
createMCRegInfo - Create a MCRegisterInfo implementation.
MCDisassembler * createMCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) const
MCAsmInfo * createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TheTriple, const MCTargetOptions &Options) const
createMCAsmInfo - Create a MCAsmInfo implementation for the specified target triple.
MCInstPrinter * createMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) const
MCInstrInfo * createMCInstrInfo() const
createMCInstrInfo - Create a MCInstrInfo implementation.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Stores all information relating to a compile unit, be it in its original instance in the object file ...
const LVSymbolTableEntry & getSymbolTableEntry(StringRef Name)
LVSectionIndex updateSymbolTable(LVScope *Function)
Expected< std::pair< LVSectionIndex, object::SectionRef > > getSection(LVScope *Scope, LVAddress Address, LVSectionIndex SectionIndex)
std::unique_ptr< MCContext > MC
void includeInlineeLines(LVSectionIndex SectionIndex, LVScope *Function)
std::unique_ptr< const MCInstrInfo > MII
LVAddress getSymbolTableAddress(StringRef Name)
void print(raw_ostream &OS) const
std::unique_ptr< const MCSubtargetInfo > STI
void addToSymbolTable(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex=0)
virtual void mapRangeAddress(const object::ObjectFile &Obj)
void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex)
void mapVirtualAddress(const object::ObjectFile &Obj)
std::unique_ptr< const MCAsmInfo > MAI
LVSectionIndex getSymbolTableIndex(StringRef Name)
bool getSymbolTableIsComdat(StringRef Name)
std::unique_ptr< const MCRegisterInfo > MRI
std::unique_ptr< const MCDisassembler > MD
LVRange * getSectionRanges(LVSectionIndex SectionIndex)
Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures)
void addSectionRange(LVSectionIndex SectionIndex, LVScope *Scope)
std::unique_ptr< MCInstPrinter > MIP
void resolvePatternMatch(LVLine *Line)
Definition: LVOptions.h:606
LVScope * getEntry(LVAddress Address) const
Definition: LVRange.cpp:83
void addEntry(LVScope *Scope, LVAddress LowerAddress, LVAddress UpperAddress)
Definition: LVRange.cpp:52
LVSectionIndex getDotTextSectionIndex() const
Definition: LVReader.h:270
LVScopeCompileUnit * CompileUnit
Definition: LVReader.h:130
LVSectionIndex DotTextSectionIndex
Definition: LVReader.h:133
const LVScopes * getScopes() const
Definition: LVScope.h:207
LVSectionIndex getIndex(StringRef Name)
LVAddress getAddress(StringRef Name)
void add(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex=0)
LVSectionIndex update(LVScope *Function)
const LVSymbolTableEntry & getEntry(StringRef Name)
bool isWasm() const
Definition: Binary.h:137
const coff_section * getCOFFSection(const SectionRef &Section) const
This class is the base class for all object file types.
Definition: ObjectFile.h:229
section_iterator_range sections() const
Definition: ObjectFile.h:328
This is a value type class that represents a single section in the list of sections in the object fil...
Definition: ObjectFile.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:678
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define UINT64_MAX
Definition: DataTypes.h:77
@ IMAGE_SCN_CNT_CODE
Definition: COFF.h:302
@ IMAGE_SCN_LNK_COMDAT
Definition: COFF.h:308
FormattedNumber hexValue(uint64_t N, unsigned Width=HEX_WIDTH, bool Upper=false)
Definition: LVSupport.h:103
LVReader & getReader()
Definition: LVReader.h:333
std::pair< LVAddress, uint64_t > LVNameInfo
Definition: LVScope.h:29
LVPatterns & patterns()
Definition: LVOptions.h:642
constexpr LVSectionIndex UndefinedSectionIndex
Definition: LVReader.h:28
LVOptions & options()
Definition: LVOptions.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object< decltype(std::make_tuple(detail::build_format_adapter(std::forward< Ts >(Vals))...))>
FormattedNumber format_decimal(int64_t N, unsigned Width)
format_decimal - Output N as a right justified, fixed-width decimal.
Definition: Format.h:212
raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
detail::AlignAdapter< T > fmt_align(T &&Item, AlignStyle Where, size_t Amount, char Fill=' ')
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1258
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1041
FormattedBytes format_bytes(ArrayRef< uint8_t > Bytes, std::optional< uint64_t > FirstByteOffset=std::nullopt, uint32_t NumPerLine=16, uint8_t ByteGroupSize=4, uint32_t IndentLevel=0, bool Upper=false)
Definition: Format.h:241
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
support::ulittle32_t PointerToRawData
Definition: COFF.h:445
support::ulittle32_t Characteristics
Definition: COFF.h:450
support::ulittle32_t SizeOfRawData
Definition: COFF.h:444
support::ulittle32_t VirtualAddress
Definition: COFF.h:443