LLVM 22.0.0git
DwarfTransformer.cpp
Go to the documentation of this file.
1//===- DwarfTransformer.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/Support/Error.h"
15
22
23#include <optional>
24
25using namespace llvm;
26using namespace gsym;
27
30 const char *CompDir;
31 std::vector<uint32_t> FileCache;
34
37 CompDir = CU->getCompilationDir();
38 FileCache.clear();
39 if (LineTable)
40 FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
41 DWARFDie Die = CU->getUnitDIE();
42 Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
43 AddrSize = CU->getAddressByteSize();
44 }
45
46 /// Return true if Addr is the highest address for a given compile unit. The
47 /// highest address is encoded as -1, of all ones in the address. These high
48 /// addresses are used by some linkers to indicate that a function has been
49 /// dead stripped or didn't end up in the linked executable.
50 bool isHighestAddress(uint64_t Addr) const {
51 if (AddrSize == 4)
52 return Addr == UINT32_MAX;
53 else if (AddrSize == 8)
54 return Addr == UINT64_MAX;
55 return false;
56 }
57
58 /// Convert a DWARF compile unit file index into a GSYM global file index.
59 ///
60 /// Each compile unit in DWARF has its own file table in the line table
61 /// prologue. GSYM has a single large file table that applies to all files
62 /// from all of the info in a GSYM file. This function converts between the
63 /// two and caches and DWARF CU file index that has already been converted so
64 /// the first client that asks for a compile unit file index will end up
65 /// doing the conversion, and subsequent clients will get the cached GSYM
66 /// index.
67 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
68 uint32_t DwarfFileIdx) {
69 if (!LineTable || DwarfFileIdx >= FileCache.size())
70 return std::nullopt;
71 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
72 if (GsymFileIdx != UINT32_MAX)
73 return GsymFileIdx;
74 std::string File;
75 if (LineTable->getFileNameByIndex(
76 DwarfFileIdx, CompDir,
78 GsymFileIdx = Gsym.insertFile(File);
79 else
80 GsymFileIdx = 0;
81 return GsymFileIdx;
82 }
83};
84
85
87 if (DWARFDie SpecDie =
88 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
89 if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
90 return SpecParent;
91 }
92 if (DWARFDie AbstDie =
93 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
94 if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
95 return AbstParent;
96 }
97
98 // We never want to follow parent for inlined subroutine - that would
99 // give us information about where the function is inlined, not what
100 // function is inlined
101 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
102 return DWARFDie();
103
104 DWARFDie ParentDie = Die.getParent();
105 if (!ParentDie)
106 return DWARFDie();
107
108 switch (ParentDie.getTag()) {
109 case dwarf::DW_TAG_namespace:
110 case dwarf::DW_TAG_structure_type:
111 case dwarf::DW_TAG_union_type:
112 case dwarf::DW_TAG_class_type:
113 case dwarf::DW_TAG_subprogram:
114 return ParentDie; // Found parent decl context DIE
115 case dwarf::DW_TAG_lexical_block:
116 return GetParentDeclContextDIE(ParentDie);
117 default:
118 break;
119 }
120
121 return DWARFDie();
122}
123
124/// Get the GsymCreator string table offset for the qualified name for the
125/// DIE passed in. This function will avoid making copies of any strings in
126/// the GsymCreator when possible. We don't need to copy a string when the
127/// string comes from our .debug_str section or is an inlined string in the
128/// .debug_info. If we create a qualified name string in this function by
129/// combining multiple strings in the DWARF string table or info, we will make
130/// a copy of the string when we add it to the string table.
131static std::optional<uint32_t>
133 // If the dwarf has mangled name, use mangled name
134 if (auto LinkageName = Die.getLinkageName()) {
135 // We have seen cases were linkage name is actually empty.
136 if (strlen(LinkageName) > 0)
137 return Gsym.insertString(LinkageName, /* Copy */ false);
138 }
139
141 if (ShortName.empty())
142 return std::nullopt;
143
144 // For C++ and ObjC, prepend names of all parent declaration contexts
145 if (!(Language == dwarf::DW_LANG_C_plus_plus ||
146 Language == dwarf::DW_LANG_C_plus_plus_03 ||
147 Language == dwarf::DW_LANG_C_plus_plus_11 ||
148 Language == dwarf::DW_LANG_C_plus_plus_14 ||
149 Language == dwarf::DW_LANG_ObjC_plus_plus ||
150 // This should not be needed for C, but we see C++ code marked as C
151 // in some binaries. This should hurt, so let's do it for C as well
152 Language == dwarf::DW_LANG_C))
153 return Gsym.insertString(ShortName, /* Copy */ false);
154
155 // Some GCC optimizations create functions with names ending with .isra.<num>
156 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
157 // If it looks like it could be the case, don't add any prefix
158 if (ShortName.starts_with("_Z") &&
159 (ShortName.contains(".isra.") || ShortName.contains(".part.")))
160 return Gsym.insertString(ShortName, /* Copy */ false);
161
162 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
163 if (ParentDeclCtxDie) {
164 std::string Name = ShortName.str();
165 while (ParentDeclCtxDie) {
166 StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
167 if (!ParentName.empty()) {
168 // "lambda" names are wrapped in < >. Replace with { }
169 // to be consistent with demangled names and not to confuse with
170 // templates
171 if (ParentName.front() == '<' && ParentName.back() == '>')
172 Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
173 "::" + Name;
174 else
175 Name = ParentName.str() + "::" + Name;
176 }
177 ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
178 }
179 // Copy the name since we created a new name in a std::string.
180 return Gsym.insertString(Name, /* Copy */ true);
181 }
182 // Don't copy the name since it exists in the DWARF object file.
183 return Gsym.insertString(ShortName, /* Copy */ false);
184}
185
187 bool CheckChildren = true;
188 switch (Die.getTag()) {
189 case dwarf::DW_TAG_subprogram:
190 // Don't look into functions within functions.
191 CheckChildren = Depth == 0;
192 break;
193 case dwarf::DW_TAG_inlined_subroutine:
194 return true;
195 default:
196 break;
197 }
198 if (!CheckChildren)
199 return false;
200 for (DWARFDie ChildDie : Die.children()) {
201 if (hasInlineInfo(ChildDie, Depth + 1))
202 return true;
203 }
204 return false;
205}
206
207static AddressRanges
209 AddressRanges Ranges;
210 for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
211 if (DwarfRange.LowPC < DwarfRange.HighPC)
212 Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
213 }
214 return Ranges;
215}
216
218 CUInfo &CUI, DWARFDie Die, uint32_t Depth,
219 FunctionInfo &FI, InlineInfo &Parent,
220 const AddressRanges &AllParentRanges,
221 bool &WarnIfEmpty) {
222 if (!hasInlineInfo(Die, Depth))
223 return;
224
225 dwarf::Tag Tag = Die.getTag();
226 if (Tag == dwarf::DW_TAG_inlined_subroutine) {
227 // create new InlineInfo and append to parent.children
229 AddressRanges AllInlineRanges;
231 if (RangesOrError) {
232 AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
233 uint32_t EmptyCount = 0;
234 for (const AddressRange &InlineRange : AllInlineRanges) {
235 // Check for empty inline range in case inline function was outlined
236 // or has not code
237 if (InlineRange.empty()) {
238 ++EmptyCount;
239 } else {
240 if (Parent.Ranges.contains(InlineRange)) {
241 II.Ranges.insert(InlineRange);
242 } else {
243 // Only warn if the current inline range is not within any of all
244 // of the parent ranges. If we have a DW_TAG_subpgram with multiple
245 // ranges we will emit a FunctionInfo for each range of that
246 // function that only emits information within the current range,
247 // so we only want to emit an error if the DWARF has issues, not
248 // when a range currently just isn't in the range we are currently
249 // parsing for.
250 if (AllParentRanges.contains(InlineRange)) {
251 WarnIfEmpty = false;
252 } else
253 Out.Report("Function DIE has uncontained address range",
254 [&](raw_ostream &OS) {
255 OS << "error: inlined function DIE at "
256 << HEX32(Die.getOffset()) << " has a range ["
257 << HEX64(InlineRange.start()) << " - "
258 << HEX64(InlineRange.end())
259 << ") that isn't contained in "
260 << "any parent address ranges, this inline range "
261 "will be "
262 "removed.\n";
263 });
264 }
265 }
266 }
267 // If we have all empty ranges for the inlines, then don't warn if we
268 // have an empty InlineInfo at the top level as all inline functions
269 // were elided.
270 if (EmptyCount == AllInlineRanges.size())
271 WarnIfEmpty = false;
272 }
273 if (II.Ranges.empty())
274 return;
275
276 if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
277 II.Name = *NameIndex;
278 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
279 Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
280 std::optional<uint32_t> OptGSymFileIdx =
281 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
282 if (OptGSymFileIdx) {
283 II.CallFile = OptGSymFileIdx.value();
284 II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
285 // parse all children and append to parent
286 for (DWARFDie ChildDie : Die.children())
287 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II,
288 AllInlineRanges, WarnIfEmpty);
289 Parent.Children.emplace_back(std::move(II));
290 } else
291 Out.Report(
292 "Inlined function die has invlaid file index in DW_AT_call_file",
293 [&](raw_ostream &OS) {
294 OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
295 << " has an invalid file index " << DwarfFileIdx
296 << " in its DW_AT_call_file attribute, this inline entry and "
297 "all "
298 << "children will be removed.\n";
299 });
300 return;
301 }
302 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
303 // skip this Die and just recurse down
304 for (DWARFDie ChildDie : Die.children())
305 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent,
306 AllParentRanges, WarnIfEmpty);
307 }
308}
309
311 DWARFDie Die, GsymCreator &Gsym,
312 FunctionInfo &FI) {
313 std::vector<uint32_t> RowVector;
314 const uint64_t StartAddress = FI.startAddress();
315 const uint64_t EndAddress = FI.endAddress();
316 const uint64_t RangeSize = EndAddress - StartAddress;
317 const object::SectionedAddress SecAddress{
319
320 // Attempt to retrieve DW_AT_LLVM_stmt_sequence if present.
321 std::optional<uint64_t> StmtSeqOffset;
322 if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) {
323 // The `DW_AT_LLVM_stmt_sequence` attribute might be set to an invalid
324 // sentinel value when it refers to an empty line sequence. In such cases,
325 // the DWARF linker will exclude the empty sequence from the final output
326 // and assign the sentinel value to the `DW_AT_LLVM_stmt_sequence`
327 // attribute. The sentinel value is UINT32_MAX for DWARF32 and UINT64_MAX
328 // for DWARF64.
329 const uint64_t InvalidOffset =
331 uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, InvalidOffset);
332 if (StmtSeqVal != InvalidOffset)
333 StmtSeqOffset = StmtSeqVal;
334 }
335
336 if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector,
337 StmtSeqOffset)) {
338 // If we have a DW_TAG_subprogram but no line entries, fall back to using
339 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
340 std::string FilePath = Die.getDeclFile(
342 if (FilePath.empty()) {
343 // If we had a DW_AT_decl_file, but got no file then we need to emit a
344 // warning.
345 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
346 Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
347 // Check if there is no DW_AT_decl_line attribute, and don't report an
348 // error if it isn't there.
349 if (DwarfFileIdx == UINT32_MAX)
350 return;
351 Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
352 OS << "error: function DIE at " << HEX32(Die.getOffset())
353 << " has an invalid file index " << DwarfFileIdx
354 << " in its DW_AT_decl_file attribute, unable to create a single "
355 << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
356 << "attributes.\n";
357 });
358 return;
359 }
360 if (auto Line =
361 dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
362 LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
363 FI.OptLineTable = LineTable();
364 FI.OptLineTable->push(LE);
365 }
366 return;
367 }
368
369 FI.OptLineTable = LineTable();
370 DWARFDebugLine::Row PrevRow;
371 for (uint32_t RowIndex : RowVector) {
372 // Take file number and line/column from the row.
373 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
374 std::optional<uint32_t> OptFileIdx =
375 CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
376 if (!OptFileIdx) {
377 Out.Report(
378 "Invalid file index in DWARF line table", [&](raw_ostream &OS) {
379 OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
380 << "a line entry with invalid DWARF file index, this entry will "
381 << "be removed:\n";
382 Row.dumpTableHeader(OS, /*Indent=*/0);
383 Row.dump(OS);
384 OS << "\n";
385 });
386 continue;
387 }
388 const uint32_t FileIdx = OptFileIdx.value();
389 uint64_t RowAddress = Row.Address.Address;
390 // Watch out for a RowAddress that is in the middle of a line table entry
391 // in the DWARF. If we pass an address in between two line table entries
392 // we will get a RowIndex for the previous valid line table row which won't
393 // be contained in our function. This is usually a bug in the DWARF due to
394 // linker problems or LTO or other DWARF re-linking so it is worth emitting
395 // an error, but not worth stopping the creation of the GSYM.
396 if (!FI.Range.contains(RowAddress)) {
397 if (RowAddress < FI.Range.start()) {
398 Out.Report("Start address lies between valid Row table entries",
399 [&](raw_ostream &OS) {
400 OS << "error: DIE has a start address whose LowPC is "
401 "between the "
402 "line table Row["
403 << RowIndex << "] with address " << HEX64(RowAddress)
404 << " and the next one.\n";
406 });
407 RowAddress = FI.Range.start();
408 } else {
409 continue;
410 }
411 }
412
413 LineEntry LE(RowAddress, FileIdx, Row.Line);
414 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
415 // We have seen full duplicate line tables for functions in some
416 // DWARF files. Watch for those here by checking the last
417 // row was the function's end address (HighPC) and that the
418 // current line table entry's address is the same as the first
419 // line entry we already have in our "function_info.Lines". If
420 // so break out after printing a warning.
421 auto FirstLE = FI.OptLineTable->first();
422 if (FirstLE && *FirstLE == LE)
423 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
424 Out.Report("Duplicate line table detected", [&](raw_ostream &OS) {
425 OS << "warning: duplicate line table detected for DIE:\n";
427 });
428 else
429 Out.Report("Non-monotonically increasing addresses",
430 [&](raw_ostream &OS) {
431 OS << "error: line table has addresses that do not "
432 << "monotonically increase:\n";
433 for (uint32_t RowIndex2 : RowVector)
434 CUI.LineTable->Rows[RowIndex2].dump(OS);
436 });
437 break;
438 }
439
440 // Skip multiple line entries for the same file and line.
441 auto LastLE = FI.OptLineTable->last();
442 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
443 continue;
444 // Only push a row if it isn't an end sequence. End sequence markers are
445 // included for the last address in a function or the last contiguous
446 // address in a sequence.
447 if (Row.EndSequence) {
448 // End sequence means that the next line entry could have a lower address
449 // that the previous entries. So we clear the previous row so we don't
450 // trigger the line table error about address that do not monotonically
451 // increase.
452 PrevRow = DWARFDebugLine::Row();
453 } else {
454 FI.OptLineTable->push(LE);
455 PrevRow = Row;
456 }
457 }
458 // If not line table rows were added, clear the line table so we don't encode
459 // on in the GSYM file.
460 if (FI.OptLineTable->empty())
461 FI.OptLineTable = std::nullopt;
462}
463
464void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
465 DWARFDie Die) {
466 switch (Die.getTag()) {
467 case dwarf::DW_TAG_subprogram: {
468 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
469 if (!RangesOrError) {
470 consumeError(RangesOrError.takeError());
471 break;
472 }
473 const DWARFAddressRangesVector &Ranges = RangesOrError.get();
474 if (Ranges.empty())
475 break;
476 auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
477 if (!NameIndex) {
478 Out.Report("Function has no name", [&](raw_ostream &OS) {
479 OS << "error: function at " << HEX64(Die.getOffset())
480 << " has no name\n ";
482 });
483 break;
484 }
485 // All ranges for the subprogram DIE in case it has multiple. We need to
486 // pass this down into parseInlineInfo so we don't warn about inline
487 // ranges that are not in the current subrange of a function when they
488 // actually are in another subgrange. We do this because when a function
489 // has discontiguos ranges, we create multiple function entries with only
490 // the info for that range contained inside of it.
491 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
492
493 // Create a function_info for each range
494 for (const DWARFAddressRange &Range : Ranges) {
495 // The low PC must be less than the high PC. Many linkers don't remove
496 // DWARF for functions that don't get linked into the final executable.
497 // If both the high and low pc have relocations, linkers will often set
498 // the address values for both to the same value to indicate the function
499 // has been remove. Other linkers have been known to set the one or both
500 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
501 // byte addresses to indicate the function isn't valid. The check below
502 // tries to watch for these cases and abort if it runs into them.
503 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
504 break;
505
506 // Many linkers can't remove DWARF and might set the LowPC to zero. Since
507 // high PC can be an offset from the low PC in more recent DWARF versions
508 // we need to watch for a zero'ed low pc which we do using ValidTextRanges
509 // below.
510 if (!Gsym.IsValidTextAddress(Range.LowPC)) {
511 // We expect zero and -1 to be invalid addresses in DWARF depending
512 // on the linker of the DWARF. This indicates a function was stripped
513 // and the debug info wasn't able to be stripped from the DWARF. If
514 // the LowPC isn't zero or -1, then we should emit an error.
515 if (Range.LowPC != 0) {
516 if (!Gsym.isQuiet()) {
517 // Unexpected invalid address, emit a warning
518 Out.Report("Address range starts outside executable section",
519 [&](raw_ostream &OS) {
520 OS << "warning: DIE has an address range whose "
521 "start address "
522 "is not in any executable sections ("
523 << *Gsym.GetValidTextRanges()
524 << ") and will not be processed:\n";
526 });
527 }
528 }
529 break;
530 }
531
532 FunctionInfo FI;
533 FI.Range = {Range.LowPC, Range.HighPC};
534 FI.Name = *NameIndex;
535 if (CUI.LineTable)
536 convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
537
538 if (hasInlineInfo(Die, 0)) {
539 FI.Inline = InlineInfo();
540 FI.Inline->Name = *NameIndex;
541 FI.Inline->Ranges.insert(FI.Range);
542 bool WarnIfEmpty = true;
543 parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline,
544 AllSubprogramRanges, WarnIfEmpty);
545 // Make sure we at least got some valid inline info other than just
546 // the top level function. If we didn't then remove the inline info
547 // from the function info. We have seen cases where LTO tries to modify
548 // the DWARF for functions and it messes up the address ranges for
549 // the inline functions so it is no longer valid.
550 //
551 // By checking if there are any valid children on the top level inline
552 // information object, we will know if we got anything valid from the
553 // debug info.
554 if (FI.Inline->Children.empty()) {
555 if (WarnIfEmpty && !Gsym.isQuiet())
556 Out.Report("DIE contains inline functions with no valid ranges",
557 [&](raw_ostream &OS) {
558 OS << "warning: DIE contains inline function "
559 "information that has no valid ranges, removing "
560 "inline information:\n";
562 });
563 FI.Inline = std::nullopt;
564 }
565 }
566
567 // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
568 if (LoadDwarfCallSites)
569 parseCallSiteInfoFromDwarf(CUI, Die, FI);
570
571 Gsym.addFunctionInfo(std::move(FI));
572 }
573 } break;
574 default:
575 break;
576 }
577 for (DWARFDie ChildDie : Die.children())
578 handleDie(Out, CUI, ChildDie);
579}
580
581void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
582 FunctionInfo &FI) {
583 // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
584 // DWARF specification:
585 // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
586 // - DW_AT_call_origin might point to a DIE of the function being called.
587 // For simplicity, we will just extract return_offset and possibly target name
588 // if available.
589
590 CallSiteInfoCollection CSIC;
591
592 for (DWARFDie Child : Die.children()) {
593 if (Child.getTag() != dwarf::DW_TAG_call_site)
594 continue;
595
596 CallSiteInfo CSI;
597 // DW_AT_call_return_pc: the return PC (address). We'll convert it to
598 // offset relative to FI's start.
599 auto ReturnPC =
600 dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
601 if (!ReturnPC || !FI.Range.contains(*ReturnPC))
602 continue;
603
604 CSI.ReturnOffset = *ReturnPC - FI.startAddress();
605
606 // Attempt to get function name from DW_AT_call_origin. If present, we can
607 // insert it as a match regex.
608 if (DWARFDie OriginDie =
609 Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
610
611 // Include the full unmangled name if available, otherwise the short name.
612 if (const char *LinkName = OriginDie.getLinkageName()) {
613 uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
614 CSI.MatchRegex.push_back(LinkNameOff);
615 } else if (const char *ShortName = OriginDie.getShortName()) {
616 uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
617 CSI.MatchRegex.push_back(ShortNameOff);
618 }
619 }
620
621 // For now, we won't attempt to deduce InternalCall/ExternalCall flags
622 // from DWARF.
624
625 CSIC.CallSites.push_back(CSI);
626 }
627
628 if (!CSIC.CallSites.empty()) {
629 if (!FI.CallSites)
630 FI.CallSites = CallSiteInfoCollection();
631 // Append parsed DWARF callsites:
632 llvm::append_range(FI.CallSites->CallSites, CSIC.CallSites);
633 }
634}
635
637 size_t NumBefore = Gsym.getNumFunctionInfos();
638 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
639 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
640 // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF.
641 if (IsMachO)
642 return ReturnDie;
643
644 if (DwarfUnit.getDWOId()) {
645 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
646 if (!DWOCU->isDWOUnit())
647 Out.Report(
648 "warning: Unable to retrieve DWO .debug_info section for some "
649 "object files. (Remove the --quiet flag for full output)",
650 [&](raw_ostream &OS) {
651 std::string DWOName = dwarf::toString(
652 DwarfUnit.getUnitDIE().find(
653 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
654 "");
655 OS << "warning: Unable to retrieve DWO .debug_info section for "
656 << DWOName << "\n";
657 });
658 else {
659 ReturnDie = DWOCU->getUnitDIE(false);
660 }
661 }
662 return ReturnDie;
663 };
664 if (NumThreads == 1) {
665 // Parse all DWARF data from this thread, use the same string/file table
666 // for everything
667 for (const auto &CU : DICtx.compile_units()) {
668 DWARFDie Die = getDie(*CU);
669 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
670 handleDie(Out, CUI, Die);
671 }
672 } else {
673 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
674 // front before we start accessing any DIEs since there might be
675 // cross compile unit references in the DWARF. If we don't do this we can
676 // end up crashing.
677
678 // We need to call getAbbreviations sequentially first so that getUnitDIE()
679 // only works with its local data.
680 for (const auto &CU : DICtx.compile_units())
681 CU->getAbbreviations();
682
683 // Now parse all DIEs in case we have cross compile unit references in a
684 // thread pool.
685 DefaultThreadPool pool(hardware_concurrency(NumThreads));
686 for (const auto &CU : DICtx.compile_units())
687 pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
688 pool.wait();
689
690 // Now convert all DWARF to GSYM in a thread pool.
691 std::mutex LogMutex;
692 for (const auto &CU : DICtx.compile_units()) {
693 DWARFDie Die = getDie(*CU);
694 if (Die) {
695 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
696 pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
697 std::string storage;
698 raw_string_ostream StrStream(storage);
699 OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
700 handleDie(ThreadOut, CUI, Die);
701 // Print ThreadLogStorage lines into an actual stream under a lock
702 std::lock_guard<std::mutex> guard(LogMutex);
703 if (Out.GetOS()) {
704 Out << storage;
705 }
706 Out.Merge(ThreadOut);
707 });
708 }
709 }
710 pool.wait();
711 }
712 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
713 Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
714 return Error::success();
715}
716
718 OutputAggregator &Out) {
719 Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
720
721 auto Gsym = GsymReader::openFile(GsymPath);
722 if (!Gsym)
723 return Gsym.takeError();
724
725 auto NumAddrs = Gsym->getNumAddresses();
728 DILineInfoSpecifier::FunctionNameKind::LinkageName);
729 std::string gsymFilename;
730 for (uint32_t I = 0; I < NumAddrs; ++I) {
731 auto FuncAddr = Gsym->getAddress(I);
732 if (!FuncAddr)
733 return createStringError(std::errc::invalid_argument,
734 "failed to extract address[%i]", I);
735
736 auto FI = Gsym->getFunctionInfo(*FuncAddr);
737 if (!FI)
738 return createStringError(
739 std::errc::invalid_argument,
740 "failed to extract function info for address 0x%" PRIu64, *FuncAddr);
741
742 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
743 const object::SectionedAddress SectAddr{
745 auto LR = Gsym->lookup(Addr);
746 if (!LR)
747 return LR.takeError();
748
749 auto DwarfInlineInfos =
750 DICtx.getInliningInfoForAddress(SectAddr, DLIS);
751 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
752 if (NumDwarfInlineInfos == 0) {
753 DwarfInlineInfos.addFrame(
754 DICtx.getLineInfoForAddress(SectAddr, DLIS).value_or(DILineInfo()));
755 }
756
757 // Check for 1 entry that has no file and line info
758 if (NumDwarfInlineInfos == 1 &&
759 DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
760 DwarfInlineInfos = DIInliningInfo();
761 NumDwarfInlineInfos = 0;
762 }
763 if (NumDwarfInlineInfos > 0 &&
764 NumDwarfInlineInfos != LR->Locations.size()) {
765 if (Out.GetOS()) {
766 raw_ostream &Log = *Out.GetOS();
767 Log << "error: address " << HEX64(Addr) << " has "
768 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
769 << LR->Locations.size() << "\n";
770 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
771 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
772 const auto &dii = DwarfInlineInfos.getFrame(Idx);
773 Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
774 << dii.FileName << ':' << dii.Line << '\n';
775 }
776 Log << " " << LR->Locations.size() << " GSYM frames:\n";
777 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
778 ++Idx) {
779 const auto &gii = LR->Locations[Idx];
780 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
781 << '/' << gii.Base << ':' << gii.Line << '\n';
782 }
783 Gsym->dump(Log, *FI);
784 }
785 continue;
786 }
787
788 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
789 ++Idx) {
790 const auto &gii = LR->Locations[Idx];
791 if (Idx < NumDwarfInlineInfos) {
792 const auto &dii = DwarfInlineInfos.getFrame(Idx);
793 gsymFilename = LR->getSourceFile(Idx);
794 // Verify function name
795 if (!StringRef(dii.FunctionName).starts_with(gii.Name))
796 Out << "error: address " << HEX64(Addr) << " DWARF function \""
797 << dii.FunctionName.c_str()
798 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
799
800 // Verify source file path
801 if (dii.FileName != gsymFilename)
802 Out << "error: address " << HEX64(Addr) << " DWARF path \""
803 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
804 << gsymFilename.c_str() << "\"\n";
805 // Verify source file line
806 if (dii.Line != gii.Line)
807 Out << "error: address " << HEX64(Addr) << " DWARF line "
808 << dii.Line << " != GSYM line " << gii.Line << "\n";
809 }
810 }
811 }
812 }
813 return Error::success();
814}
static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out, CUInfo &CUI, DWARFDie Die, uint32_t Depth, FunctionInfo &FI, InlineInfo &Parent, const AddressRanges &AllParentRanges, bool &WarnIfEmpty)
static bool hasInlineInfo(DWARFDie Die, uint32_t Depth)
static AddressRanges ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges)
static std::optional< uint32_t > getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym)
Get the GsymCreator string table offset for the qualified name for the DIE passed in.
static DWARFDie GetParentDeclContextDIE(DWARFDie &Die)
static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, DWARFDie Die, GsymCreator &Gsym, FunctionInfo &FI)
#define HEX64(v)
#define HEX32(v)
@ CallSiteInfo
@ InlineInfo
#define I(x, y, z)
Definition MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
A class that represents an address range.
uint64_t start() const
bool contains(uint64_t Addr) const
bool contains(uint64_t Addr) const
The AddressRanges class helps normalize address range collections.
A format-neutral container for inlined code description.
Definition DIContext.h:94
DWARFContext This data structure is the top level entity that deals with dwarf debug information pars...
const DWARFDebugLine::LineTable * getLineTableForUnit(DWARFUnit *U)
Get a pointer to a parsed line table corresponding to a compile unit.
Utility class that carries the DWARF compile/type unit and the debug info entry in an object.
Definition DWARFDie.h:43
uint64_t getOffset() const
Get the absolute offset into the debug info or types section.
Definition DWARFDie.h:68
LLVM_ABI Expected< DWARFAddressRangesVector > getAddressRanges() const
Get the address ranges for this DIE.
Definition DWARFDie.cpp:427
iterator_range< iterator > children() const
Definition DWARFDie.h:406
LLVM_ABI DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const
Extract the specified attribute from this DIE as the referenced DIE.
Definition DWARFDie.cpp:346
LLVM_ABI DWARFDie getParent() const
Get the parent of this DIE object.
Definition DWARFDie.cpp:695
LLVM_ABI std::optional< DWARFFormValue > find(dwarf::Attribute Attr) const
Extract the specified attribute from this DIE.
Definition DWARFDie.cpp:290
DWARFUnit * getDwarfUnit() const
Definition DWARFDie.h:55
LLVM_ABI std::optional< DWARFFormValue > findRecursively(ArrayRef< dwarf::Attribute > Attrs) const
Extract the first value of any attribute in Attrs from this DIE and recurse into any DW_AT_specificat...
Definition DWARFDie.cpp:314
LLVM_ABI const char * getName(DINameKind Kind) const
Return the DIE name resolving DW_AT_specification or DW_AT_abstract_origin references if necessary.
Definition DWARFDie.cpp:503
LLVM_ABI std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const
Definition DWARFDie.cpp:535
dwarf::Tag getTag() const
Definition DWARFDie.h:73
LLVM_ABI const char * getLinkageName() const
Return the DIE linkage name resolving DW_AT_specification or DW_AT_abstract_origin references if nece...
Definition DWARFDie.cpp:521
LLVM_ABI void dump(raw_ostream &OS, unsigned indent=0, DIDumpOptions DumpOpts=DIDumpOptions()) const
Dump the DIE and all of its attributes to the supplied stream.
Definition DWARFDie.cpp:635
const dwarf::FormParams & getFormParams() const
Definition DWARFUnit.h:326
DWARFDie getUnitDIE(bool ExtractUnitDIEOnly=true)
Definition DWARFUnit.h:447
bool isDWOUnit() const
Definition DWARFUnit.h:322
This dwarf writer support class manages information associated with a source file.
Definition DwarfUnit.h:36
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
reference get()
Returns a reference to the stored T value.
Definition Error.h:582
void wait() override
Blocking wait for all the tasks to execute first.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
char back() const
back - Get the last character in the string.
Definition StringRef.h:155
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
char front() const
front - Get the first character in the string.
Definition StringRef.h:149
auto async(Function &&F, Args &&...ArgList)
Asynchronous submission of a task to the pool.
Definition ThreadPool.h:80
LLVM_ABI llvm::Error convert(uint32_t NumThreads, OutputAggregator &OS)
Extract the DWARF from the supplied object file and convert it into the Gsym format in the GsymCreato...
LLVM_ABI llvm::Error verify(StringRef GsymPath, OutputAggregator &OS)
GsymCreator is used to emit GSYM data to a stand alone file or section within a file.
LLVM_ABI uint32_t insertString(StringRef S, bool Copy=true)
Insert a string into the GSYM string table.
LLVM_ABI uint32_t insertFile(StringRef Path, sys::path::Style Style=sys::path::Style::native)
Insert a file into this GSYM creator.
static LLVM_ABI llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition LineTable.h:119
void Report(StringRef s, std::function< void(raw_ostream &o)> detailCallback)
raw_ostream * GetOS() const
void Merge(const OutputAggregator &other)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define UINT64_MAX
Definition DataTypes.h:77
std::optional< uint64_t > toAddress(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract an address.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
std::optional< uint64_t > toSectionOffset(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract an section offset.
std::optional< uint64_t > toUnsigned(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract an unsigned constant.
This is an optimization pass for GlobalISel generic memory operations.
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition Threading.h:190
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
SingleThreadExecutor DefaultThreadPool
Definition ThreadPool.h:254
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
std::vector< DWARFAddressRange > DWARFAddressRangesVector
DWARFAddressRangesVector - represents a set of absolute address ranges.
static DIDumpOptions getForSingleDIE()
Return default option set for printing a single DIE without children.
Definition DIContext.h:218
Controls which fields of DILineInfo container should be filled with data.
Definition DIContext.h:146
A format-neutral container for source line information.
Definition DIContext.h:32
LLVM_ABI bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, std::vector< uint32_t > &Result, std::optional< uint64_t > StmtSequenceOffset=std::nullopt) const
Fills the Result argument with the indices of the rows that correspond to the address range specified...
Standard .debug_line state machine structure.
object::SectionedAddress Address
The program-counter value corresponding to a machine instruction generated by the compiler and sectio...
uint64_t getDwarfMaxOffset() const
Definition Dwarf.h:1131
const DWARFDebugLine::LineTable * LineTable
std::optional< uint32_t > DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx)
Convert a DWARF compile unit file index into a GSYM global file index.
CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU)
bool isHighestAddress(uint64_t Addr) const
Return true if Addr is the highest address for a given compile unit.
std::vector< uint32_t > FileCache
std::vector< CallSiteInfo > CallSites
std::vector< uint32_t > MatchRegex
Offsets into the string table for function names regex patterns.
uint64_t ReturnOffset
The return offset of the call site - relative to the function start.
Function information in GSYM files encodes information for one contiguous address range.
std::optional< InlineInfo > Inline
uint64_t startAddress() const
uint64_t endAddress() const
std::optional< CallSiteInfoCollection > CallSites
uint64_t size() const
uint32_t Name
String table offset in the string table.
std::optional< LineTable > OptLineTable
Inline information stores the name of the inline function along with an array of address ranges.
Definition InlineInfo.h:60
std::vector< InlineInfo > Children
Definition InlineInfo.h:66
AddressRanges Ranges
Definition InlineInfo.h:65
Line entries are used to encode the line tables in FunctionInfo objects.
Definition LineEntry.h:22
static const uint64_t UndefSection
Definition ObjectFile.h:148