LLVM  12.0.0git
GsymCreator.cpp
Go to the documentation of this file.
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
23 
26 }
27 
30  llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
32  // We must insert the strings first, then call the FileEntry constructor.
33  // If we inline the insertString() function call into the constructor, the
34  // call order is undefined due to parameter lists not having any ordering
35  // requirements.
36  const uint32_t Dir = insertString(directory);
37  const uint32_t Base = insertString(filename);
38  FileEntry FE(Dir, Base);
39 
40  std::lock_guard<std::recursive_mutex> Guard(Mutex);
41  const auto NextIndex = Files.size();
42  // Find FE in hash map and insert if not present.
43  auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
44  if (R.second)
45  Files.emplace_back(FE);
46  return R.first->second;
47 }
48 
50  llvm::support::endianness ByteOrder) const {
51  std::error_code EC;
52  raw_fd_ostream OutStrm(Path, EC);
53  if (EC)
54  return llvm::errorCodeToError(EC);
55  FileWriter O(OutStrm, ByteOrder);
56  return encode(O);
57 }
58 
60  std::lock_guard<std::recursive_mutex> Guard(Mutex);
61  if (Funcs.empty())
62  return createStringError(std::errc::invalid_argument,
63  "no functions to encode");
64  if (!Finalized)
65  return createStringError(std::errc::invalid_argument,
66  "GsymCreator wasn't finalized prior to encoding");
67 
68  if (Funcs.size() > UINT32_MAX)
69  return createStringError(std::errc::invalid_argument,
70  "too many FunctionInfos");
71 
72  const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73  const uint64_t MaxAddr = Funcs.back().startAddress();
74  const uint64_t AddrDelta = MaxAddr - MinAddr;
75  Header Hdr;
76  Hdr.Magic = GSYM_MAGIC;
77  Hdr.Version = GSYM_VERSION;
78  Hdr.AddrOffSize = 0;
79  Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80  Hdr.BaseAddress = MinAddr;
81  Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82  Hdr.StrtabOffset = 0; // We will fix this up later.
83  Hdr.StrtabSize = 0; // We will fix this up later.
84  memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85  if (UUID.size() > sizeof(Hdr.UUID))
86  return createStringError(std::errc::invalid_argument,
87  "invalid UUID size %u", (uint32_t)UUID.size());
88  // Set the address offset size correctly in the GSYM header.
89  if (AddrDelta <= UINT8_MAX)
90  Hdr.AddrOffSize = 1;
91  else if (AddrDelta <= UINT16_MAX)
92  Hdr.AddrOffSize = 2;
93  else if (AddrDelta <= UINT32_MAX)
94  Hdr.AddrOffSize = 4;
95  else
96  Hdr.AddrOffSize = 8;
97  // Copy the UUID value if we have one.
98  if (UUID.size() > 0)
99  memcpy(Hdr.UUID, UUID.data(), UUID.size());
100  // Write out the header.
101  llvm::Error Err = Hdr.encode(O);
102  if (Err)
103  return Err;
104 
105  // Write out the address offsets.
106  O.alignTo(Hdr.AddrOffSize);
107  for (const auto &FuncInfo : Funcs) {
108  uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109  switch(Hdr.AddrOffSize) {
110  case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
111  case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
112  case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
113  case 8: O.writeU64(AddrOffset); break;
114  }
115  }
116 
117  // Write out all zeros for the AddrInfoOffsets.
118  O.alignTo(4);
119  const off_t AddrInfoOffsetsOffset = O.tell();
120  for (size_t i = 0, n = Funcs.size(); i < n; ++i)
121  O.writeU32(0);
122 
123  // Write out the file table
124  O.alignTo(4);
125  assert(!Files.empty());
126  assert(Files[0].Dir == 0);
127  assert(Files[0].Base == 0);
128  size_t NumFiles = Files.size();
129  if (NumFiles > UINT32_MAX)
130  return createStringError(std::errc::invalid_argument,
131  "too many files");
132  O.writeU32(static_cast<uint32_t>(NumFiles));
133  for (auto File: Files) {
134  O.writeU32(File.Dir);
135  O.writeU32(File.Base);
136  }
137 
138  // Write out the sting table.
139  const off_t StrtabOffset = O.tell();
140  StrTab.write(O.get_stream());
141  const off_t StrtabSize = O.tell() - StrtabOffset;
142  std::vector<uint32_t> AddrInfoOffsets;
143 
144  // Write out the address infos for each function info.
145  for (const auto &FuncInfo : Funcs) {
146  if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
147  AddrInfoOffsets.push_back(OffsetOrErr.get());
148  else
149  return OffsetOrErr.takeError();
150  }
151  // Fixup the string table offset and size in the header
152  O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
153  O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
154 
155  // Fixup all address info offsets
156  uint64_t Offset = 0;
157  for (auto AddrInfoOffset: AddrInfoOffsets) {
158  O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
159  Offset += 4;
160  }
161  return ErrorSuccess();
162 }
163 
165  std::lock_guard<std::recursive_mutex> Guard(Mutex);
166  if (Finalized)
167  return createStringError(std::errc::invalid_argument,
168  "already finalized");
169  Finalized = true;
170 
171  // Sort function infos so we can emit sorted functions.
172  llvm::sort(Funcs.begin(), Funcs.end());
173 
174  // Don't let the string table indexes change by finalizing in order.
175  StrTab.finalizeInOrder();
176 
177  // Remove duplicates function infos that have both entries from debug info
178  // (DWARF or Breakpad) and entries from the SymbolTable.
179  //
180  // Also handle overlapping function. Usually there shouldn't be any, but they
181  // can and do happen in some rare cases.
182  //
183  // (a) (b) (c)
184  // ^ ^ ^ ^
185  // |X |Y |X ^ |X
186  // | | | |Y | ^
187  // | | | v v |Y
188  // v v v v
189  //
190  // In (a) and (b), Y is ignored and X will be reported for the full range.
191  // In (c), both functions will be included in the result and lookups for an
192  // address in the intersection will return Y because of binary search.
193  //
194  // Note that in case of (b), we cannot include Y in the result because then
195  // we wouldn't find any function for range (end of Y, end of X)
196  // with binary search
197  auto NumBefore = Funcs.size();
198  auto Curr = Funcs.begin();
199  auto Prev = Funcs.end();
200  while (Curr != Funcs.end()) {
201  // Can't check for overlaps or same address ranges if we don't have a
202  // previous entry
203  if (Prev != Funcs.end()) {
204  if (Prev->Range.intersects(Curr->Range)) {
205  // Overlapping address ranges.
206  if (Prev->Range == Curr->Range) {
207  // Same address range. Check if one is from debug info and the other
208  // is from a symbol table. If so, then keep the one with debug info.
209  // Our sorting guarantees that entries with matching address ranges
210  // that have debug info are last in the sort.
211  if (*Prev == *Curr) {
212  // FunctionInfo entries match exactly (range, lines, inlines)
213  OS << "warning: duplicate function info entries for range: "
214  << Curr->Range << '\n';
215  Curr = Funcs.erase(Prev);
216  } else {
217  if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
218  // Same address range, one with no debug info (symbol) and the
219  // next with debug info. Keep the latter.
220  Curr = Funcs.erase(Prev);
221  } else {
222  OS << "warning: same address range contains different debug "
223  << "info. Removing:\n"
224  << *Prev << "\nIn favor of this one:\n"
225  << *Curr << "\n";
226  Curr = Funcs.erase(Prev);
227  }
228  }
229  } else {
230  // print warnings about overlaps
231  OS << "warning: function ranges overlap:\n"
232  << *Prev << "\n"
233  << *Curr << "\n";
234  }
235  } else if (Prev->Range.size() == 0 &&
236  Curr->Range.contains(Prev->Range.Start)) {
237  OS << "warning: removing symbol:\n"
238  << *Prev << "\nKeeping:\n"
239  << *Curr << "\n";
240  Curr = Funcs.erase(Prev);
241  }
242  }
243  if (Curr == Funcs.end())
244  break;
245  Prev = Curr++;
246  }
247 
248  // If our last function info entry doesn't have a size and if we have valid
249  // text ranges, we should set the size of the last entry since any search for
250  // a high address might match our last entry. By fixing up this size, we can
251  // help ensure we don't cause lookups to always return the last symbol that
252  // has no size when doing lookups.
253  if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
254  if (auto Range = ValidTextRanges->getRangeThatContains(
255  Funcs.back().Range.Start)) {
256  Funcs.back().Range.End = Range->End;
257  }
258  }
259  OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
260  << Funcs.size() << " total\n";
261  return Error::success();
262 }
263 
265  if (S.empty())
266  return 0;
267  std::lock_guard<std::recursive_mutex> Guard(Mutex);
268  if (Copy) {
269  // We need to provide backing storage for the string if requested
270  // since StringTableBuilder stores references to strings. Any string
271  // that comes from a section in an object file doesn't need to be
272  // copied, but any string created by code will need to be copied.
273  // This allows GsymCreator to be really fast when parsing DWARF and
274  // other object files as most strings don't need to be copied.
275  CachedHashStringRef CHStr(S);
276  if (!StrTab.contains(CHStr))
277  S = StringStorage.insert(S).first->getKey();
278  }
279  return StrTab.add(S);
280 }
281 
283  std::lock_guard<std::recursive_mutex> Guard(Mutex);
284  Ranges.insert(FI.Range);
285  Funcs.emplace_back(FI);
286 }
287 
289  std::function<bool(FunctionInfo &)> const &Callback) {
290  std::lock_guard<std::recursive_mutex> Guard(Mutex);
291  for (auto &FI : Funcs) {
292  if (!Callback(FI))
293  break;
294  }
295 }
296 
298  std::function<bool(const FunctionInfo &)> const &Callback) const {
299  std::lock_guard<std::recursive_mutex> Guard(Mutex);
300  for (const auto &FI : Funcs) {
301  if (!Callback(FI))
302  break;
303  }
304 }
305 
307  std::lock_guard<std::recursive_mutex> Guard(Mutex);
308  return Funcs.size();
309 }
310 
311 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
312  if (ValidTextRanges)
313  return ValidTextRanges->contains(Addr);
314  return true; // No valid text ranges has been set, so accept all ranges.
315 }
316 
317 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
318  std::lock_guard<std::recursive_mutex> Guard(Mutex);
319  return Ranges.contains(Addr);
320 }
uint16_t Version
The version can number determines how the header is decoded and how each InfoType in FunctionInfo is ...
Definition: Header.h:54
A container which contains a StringRef plus a precomputed hash.
void forEachFunctionInfo(std::function< bool(FunctionInfo &)> const &Callback)
Thread safe iteration over all function infos.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void writeU8(uint8_t Value)
Write a single uint8_t value into the stream at the current file position.
Definition: FileWriter.cpp:34
uint32_t insertString(StringRef S, bool Copy=true)
Insert a string into the GSYM string table.
Subclass of Error for the sole purpose of identifying the success path in the type system...
Definition: Error.h:330
bool contains(uint64_t Addr) const
Definition: Range.cpp:38
uint32_t Magic
The magic bytes should be set to GSYM_MAGIC.
Definition: Header.h:49
void alignTo(size_t Align)
Pad with zeroes at the current file position until the current file position matches the specified al...
Definition: FileWriter.cpp:71
Function information in GSYM files encodes information for one contiguous address range...
Definition: FunctionInfo.h:88
void fixup32(uint32_t Value, uint64_t Offset)
Fixup a uint32_t value at the specified offset in the stream.
Definition: FileWriter.cpp:53
llvm::Error encode(FileWriter &O) const
Encode this object into FileWriter stream.
Definition: Header.cpp:85
uint8_t UUIDSize
The size in bytes of the UUID encoded in the "UUID" member.
Definition: Header.h:58
void writeU32(uint32_t Value)
Write a single uint32_t value into the stream at the current file position.
Definition: FileWriter.cpp:43
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
size_t getNumFunctionInfos() const
Get the current number of FunctionInfo objects contained in this object.
void writeU64(uint64_t Value)
Write a single uint64_t value into the stream at the current file position.
Definition: FileWriter.cpp:48
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to...
Definition: Header.h:62
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
uint8_t UUID[GSYM_MAX_UUID_SIZE]
The UUID of the original executable file.
Definition: Header.h:86
void write(raw_ostream &OS) const
Utility for building string tables with deduplicated suffixes.
size_t add(CachedHashStringRef S)
Add a string to the builder.
bool hasFunctionInfoForAddress(uint64_t Addr) const
Check if an address has already been added as a function info.
void finalizeInOrder()
Finalize the string table without reording it.
Instrumentation for Order File
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:87
llvm::Error finalize(llvm::raw_ostream &OS)
Finalize the data in the GSYM creator prior to saving the data out.
#define offsetof(TYPE, MEMBER)
A simplified binary data writer class that doesn&#39;t require targets, target definitions, architectures, or require any other optional compile time libraries to be enabled via the build process.
Definition: FileWriter.h:29
llvm::Error encode(FileWriter &O) const
Encode a GSYM into the file writer stream at the current position.
Definition: GsymCreator.cpp:59
llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const
Save a GSYM file to a stand alone file.
Definition: GsymCreator.cpp:49
uint32_t StrtabSize
The size in bytes of the string table.
Definition: Header.h:80
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1433
StringRef parent_path(StringRef path, Style style=Style::native)
Get parent path.
Definition: Path.cpp:466
void insert(AddressRange Range)
Definition: Range.cpp:20
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
bool IsValidTextAddress(uint64_t Addr) const
Check if an address is a valid code address.
static ErrorSuccess success()
Create a success value.
Definition: Error.h:332
uint32_t StrtabOffset
The file relative offset of the start of the string table for strings contained in the GSYM file...
Definition: Header.h:72
bool contains(StringRef S) const
Check if a string is contained in the string table.
std::pair< llvm::MachO::Target, std::string > UUID
The GSYM header.
Definition: Header.h:45
void addFunctionInfo(FunctionInfo &&FI)
Add a function info to this GSYM creator.
Files in GSYM are contained in FileEntry structs where we split the directory and basename into two d...
Definition: FileEntry.h:25
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:408
StringRef filename(StringRef path, Style style=Style::native)
Get filename.
Definition: Path.cpp:577
uint64_t tell()
Return the current offset within the file.
Definition: FileWriter.cpp:67
uint8_t AddrOffSize
The size in bytes of each address offset in the address offsets table.
Definition: Header.h:56
llvm::raw_pwrite_stream & get_stream()
Definition: FileWriter.h:112
void writeU16(uint16_t Value)
Write a single uint16_t value into the stream at the current file position.
Definition: FileWriter.cpp:38
uint32_t insertFile(StringRef Path, sys::path::Style Style=sys::path::Style::native)
Insert a file into this GSYM creator.
Definition: GsymCreator.cpp:28
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
constexpr uint32_t GSYM_MAGIC
Definition: Header.h:24
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
constexpr uint32_t GSYM_VERSION
Definition: Header.h:26
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1202
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64