LLVM  14.0.0git
GsymCreator.cpp
Go to the documentation of this file.
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
24  : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
26 }
27 
31  // We must insert the strings first, then call the FileEntry constructor.
32  // If we inline the insertString() function call into the constructor, the
33  // call order is undefined due to parameter lists not having any ordering
34  // requirements.
35  const uint32_t Dir = insertString(directory);
37  FileEntry FE(Dir, Base);
38 
39  std::lock_guard<std::mutex> Guard(Mutex);
40  const auto NextIndex = Files.size();
41  // Find FE in hash map and insert if not present.
42  auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
43  if (R.second)
44  Files.emplace_back(FE);
45  return R.first->second;
46 }
47 
49  llvm::support::endianness ByteOrder) const {
50  std::error_code EC;
51  raw_fd_ostream OutStrm(Path, EC);
52  if (EC)
53  return llvm::errorCodeToError(EC);
54  FileWriter O(OutStrm, ByteOrder);
55  return encode(O);
56 }
57 
59  std::lock_guard<std::mutex> Guard(Mutex);
60  if (Funcs.empty())
61  return createStringError(std::errc::invalid_argument,
62  "no functions to encode");
63  if (!Finalized)
64  return createStringError(std::errc::invalid_argument,
65  "GsymCreator wasn't finalized prior to encoding");
66 
67  if (Funcs.size() > UINT32_MAX)
68  return createStringError(std::errc::invalid_argument,
69  "too many FunctionInfos");
70 
71  const uint64_t MinAddr =
72  BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73  const uint64_t MaxAddr = Funcs.back().startAddress();
74  const uint64_t AddrDelta = MaxAddr - MinAddr;
75  Header Hdr;
76  Hdr.Magic = GSYM_MAGIC;
77  Hdr.Version = GSYM_VERSION;
78  Hdr.AddrOffSize = 0;
79  Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80  Hdr.BaseAddress = MinAddr;
81  Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82  Hdr.StrtabOffset = 0; // We will fix this up later.
83  Hdr.StrtabSize = 0; // We will fix this up later.
84  memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85  if (UUID.size() > sizeof(Hdr.UUID))
86  return createStringError(std::errc::invalid_argument,
87  "invalid UUID size %u", (uint32_t)UUID.size());
88  // Set the address offset size correctly in the GSYM header.
89  if (AddrDelta <= UINT8_MAX)
90  Hdr.AddrOffSize = 1;
91  else if (AddrDelta <= UINT16_MAX)
92  Hdr.AddrOffSize = 2;
93  else if (AddrDelta <= UINT32_MAX)
94  Hdr.AddrOffSize = 4;
95  else
96  Hdr.AddrOffSize = 8;
97  // Copy the UUID value if we have one.
98  if (UUID.size() > 0)
99  memcpy(Hdr.UUID, UUID.data(), UUID.size());
100  // Write out the header.
101  llvm::Error Err = Hdr.encode(O);
102  if (Err)
103  return Err;
104 
105  // Write out the address offsets.
106  O.alignTo(Hdr.AddrOffSize);
107  for (const auto &FuncInfo : Funcs) {
108  uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109  switch (Hdr.AddrOffSize) {
110  case 1:
111  O.writeU8(static_cast<uint8_t>(AddrOffset));
112  break;
113  case 2:
114  O.writeU16(static_cast<uint16_t>(AddrOffset));
115  break;
116  case 4:
117  O.writeU32(static_cast<uint32_t>(AddrOffset));
118  break;
119  case 8:
120  O.writeU64(AddrOffset);
121  break;
122  }
123  }
124 
125  // Write out all zeros for the AddrInfoOffsets.
126  O.alignTo(4);
127  const off_t AddrInfoOffsetsOffset = O.tell();
128  for (size_t i = 0, n = Funcs.size(); i < n; ++i)
129  O.writeU32(0);
130 
131  // Write out the file table
132  O.alignTo(4);
133  assert(!Files.empty());
134  assert(Files[0].Dir == 0);
135  assert(Files[0].Base == 0);
136  size_t NumFiles = Files.size();
137  if (NumFiles > UINT32_MAX)
138  return createStringError(std::errc::invalid_argument, "too many files");
139  O.writeU32(static_cast<uint32_t>(NumFiles));
140  for (auto File : Files) {
141  O.writeU32(File.Dir);
142  O.writeU32(File.Base);
143  }
144 
145  // Write out the sting table.
146  const off_t StrtabOffset = O.tell();
147  StrTab.write(O.get_stream());
148  const off_t StrtabSize = O.tell() - StrtabOffset;
149  std::vector<uint32_t> AddrInfoOffsets;
150 
151  // Write out the address infos for each function info.
152  for (const auto &FuncInfo : Funcs) {
153  if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
154  AddrInfoOffsets.push_back(OffsetOrErr.get());
155  else
156  return OffsetOrErr.takeError();
157  }
158  // Fixup the string table offset and size in the header
159  O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
160  O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
161 
162  // Fixup all address info offsets
163  uint64_t Offset = 0;
164  for (auto AddrInfoOffset : AddrInfoOffsets) {
165  O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
166  Offset += 4;
167  }
168  return ErrorSuccess();
169 }
170 
171 // Similar to std::remove_if, but the predicate is binary and it is passed both
172 // the previous and the current element.
173 template <class ForwardIt, class BinaryPredicate>
174 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
175  BinaryPredicate Pred) {
176  if (FirstIt != LastIt) {
177  auto PrevIt = FirstIt++;
178  FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
179  return Pred(*PrevIt++, Curr);
180  });
181  if (FirstIt != LastIt)
182  for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
183  if (!Pred(*PrevIt, *CurrIt)) {
184  PrevIt = FirstIt;
185  *FirstIt++ = std::move(*CurrIt);
186  }
187  }
188  return FirstIt;
189 }
190 
192  std::lock_guard<std::mutex> Guard(Mutex);
193  if (Finalized)
194  return createStringError(std::errc::invalid_argument, "already finalized");
195  Finalized = true;
196 
197  // Sort function infos so we can emit sorted functions.
198  llvm::sort(Funcs);
199 
200  // Don't let the string table indexes change by finalizing in order.
201  StrTab.finalizeInOrder();
202 
203  // Remove duplicates function infos that have both entries from debug info
204  // (DWARF or Breakpad) and entries from the SymbolTable.
205  //
206  // Also handle overlapping function. Usually there shouldn't be any, but they
207  // can and do happen in some rare cases.
208  //
209  // (a) (b) (c)
210  // ^ ^ ^ ^
211  // |X |Y |X ^ |X
212  // | | | |Y | ^
213  // | | | v v |Y
214  // v v v v
215  //
216  // In (a) and (b), Y is ignored and X will be reported for the full range.
217  // In (c), both functions will be included in the result and lookups for an
218  // address in the intersection will return Y because of binary search.
219  //
220  // Note that in case of (b), we cannot include Y in the result because then
221  // we wouldn't find any function for range (end of Y, end of X)
222  // with binary search
223  auto NumBefore = Funcs.size();
224  Funcs.erase(
225  removeIfBinary(Funcs.begin(), Funcs.end(),
226  [&](const auto &Prev, const auto &Curr) {
227  // Empty ranges won't intersect, but we still need to
228  // catch the case where we have multiple symbols at the
229  // same address and coalesce them.
230  const bool ranges_equal = Prev.Range == Curr.Range;
231  if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
232  // Overlapping ranges or empty identical ranges.
233  if (ranges_equal) {
234  // Same address range. Check if one is from debug
235  // info and the other is from a symbol table. If
236  // so, then keep the one with debug info. Our
237  // sorting guarantees that entries with matching
238  // address ranges that have debug info are last in
239  // the sort.
240  if (Prev == Curr) {
241  // FunctionInfo entries match exactly (range,
242  // lines, inlines)
243 
244  // We used to output a warning here, but this was
245  // so frequent on some binaries, in particular
246  // when those were built with GCC, that it slowed
247  // down processing extremely.
248  return true;
249  } else {
250  if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
251  // Same address range, one with no debug info
252  // (symbol) and the next with debug info. Keep
253  // the latter.
254  return true;
255  } else {
256  if (!Quiet) {
257  OS << "warning: same address range contains "
258  "different debug "
259  << "info. Removing:\n"
260  << Prev << "\nIn favor of this one:\n"
261  << Curr << "\n";
262  }
263  return true;
264  }
265  }
266  } else {
267  if (!Quiet) { // print warnings about overlaps
268  OS << "warning: function ranges overlap:\n"
269  << Prev << "\n"
270  << Curr << "\n";
271  }
272  }
273  } else if (Prev.Range.size() == 0 &&
274  Curr.Range.contains(Prev.Range.Start)) {
275  if (!Quiet) {
276  OS << "warning: removing symbol:\n"
277  << Prev << "\nKeeping:\n"
278  << Curr << "\n";
279  }
280  return true;
281  }
282 
283  return false;
284  }),
285  Funcs.end());
286 
287  // If our last function info entry doesn't have a size and if we have valid
288  // text ranges, we should set the size of the last entry since any search for
289  // a high address might match our last entry. By fixing up this size, we can
290  // help ensure we don't cause lookups to always return the last symbol that
291  // has no size when doing lookups.
292  if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
293  if (auto Range =
294  ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) {
295  Funcs.back().Range.End = Range->End;
296  }
297  }
298  OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
299  << Funcs.size() << " total\n";
300  return Error::success();
301 }
302 
303 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
304  if (S.empty())
305  return 0;
306 
307  // The hash can be calculated outside the lock.
308  CachedHashStringRef CHStr(S);
309  std::lock_guard<std::mutex> Guard(Mutex);
310  if (Copy) {
311  // We need to provide backing storage for the string if requested
312  // since StringTableBuilder stores references to strings. Any string
313  // that comes from a section in an object file doesn't need to be
314  // copied, but any string created by code will need to be copied.
315  // This allows GsymCreator to be really fast when parsing DWARF and
316  // other object files as most strings don't need to be copied.
317  if (!StrTab.contains(CHStr))
318  CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
319  CHStr.hash()};
320  }
321  return StrTab.add(CHStr);
322 }
323 
325  std::lock_guard<std::mutex> Guard(Mutex);
326  Ranges.insert(FI.Range);
327  Funcs.emplace_back(std::move(FI));
328 }
329 
331  std::function<bool(FunctionInfo &)> const &Callback) {
332  std::lock_guard<std::mutex> Guard(Mutex);
333  for (auto &FI : Funcs) {
334  if (!Callback(FI))
335  break;
336  }
337 }
338 
340  std::function<bool(const FunctionInfo &)> const &Callback) const {
341  std::lock_guard<std::mutex> Guard(Mutex);
342  for (const auto &FI : Funcs) {
343  if (!Callback(FI))
344  break;
345  }
346 }
347 
349  std::lock_guard<std::mutex> Guard(Mutex);
350  return Funcs.size();
351 }
352 
354  if (ValidTextRanges)
355  return ValidTextRanges->contains(Addr);
356  return true; // No valid text ranges has been set, so accept all ranges.
357 }
358 
360  std::lock_guard<std::mutex> Guard(Mutex);
361  return Ranges.contains(Addr);
362 }
i
i
Definition: README.txt:29
llvm::gsym::Header::UUIDSize
uint8_t UUIDSize
The size in bytes of the UUID encoded in the "UUID" member.
Definition: Header.h:58
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::CachedHashStringRef::hash
uint32_t hash() const
Definition: CachedHashString.h:46
llvm::gsym::Header::BaseAddress
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to.
Definition: Header.h:62
llvm::StringTableBuilder::contains
bool contains(StringRef S) const
Check if a string is contained in the string table.
Definition: StringTableBuilder.h:75
llvm::gsym::AddressRanges::insert
void insert(AddressRange Range)
Definition: Range.cpp:19
offsetof
#define offsetof(TYPE, MEMBER)
Definition: AMDHSAKernelDescriptor.h:23
llvm::gsym::Header
The GSYM header.
Definition: Header.h:45
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition: Error.h:331
llvm::StringTableBuilder::finalizeInOrder
void finalizeInOrder()
Finalize the string table without reording it.
Definition: StringTableBuilder.cpp:133
llvm::gsym::GsymCreator::encode
llvm::Error encode(FileWriter &O) const
Encode a GSYM into the file writer stream at the current position.
Definition: GsymCreator.cpp:58
llvm::gsym::GsymCreator::IsValidTextAddress
bool IsValidTextAddress(uint64_t Addr) const
Check if an address is a valid code address.
Definition: GsymCreator.cpp:353
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
llvm::gsym::GSYM_MAGIC
constexpr uint32_t GSYM_MAGIC
Definition: Header.h:24
StringTableBuilder.h
llvm::gsym::Header::encode
llvm::Error encode(FileWriter &O) const
Encode this object into FileWriter stream.
Definition: Header.cpp:85
llvm::gsym::GSYM_VERSION
constexpr uint32_t GSYM_VERSION
Definition: Header.h:26
llvm::errorCodeToError
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:87
llvm::gsym::AddressRanges::contains
bool contains(uint64_t Addr) const
Definition: Range.cpp:37
llvm::gsym::GsymCreator::hasFunctionInfoForAddress
bool hasFunctionInfoForAddress(uint64_t Addr) const
Check if an address has already been added as a function info.
Definition: GsymCreator.cpp:359
llvm::gsym::Header::Magic
uint32_t Magic
The magic bytes should be set to GSYM_MAGIC.
Definition: Header.h:49
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::gsym::Header::StrtabSize
uint32_t StrtabSize
The size in bytes of the string table.
Definition: Header.h:80
llvm::sys::path::Style
Style
Definition: Path.h:28
llvm::sys::SmartMutex< false >
llvm::gsym::FunctionInfo
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:89
llvm::CachedHashStringRef
A container which contains a StringRef plus a precomputed hash.
Definition: CachedHashString.h:28
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
uint64_t
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::ErrorSuccess
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:329
llvm::gsym::Header::UUID
uint8_t UUID[GSYM_MAX_UUID_SIZE]
The UUID of the original executable file.
Definition: Header.h:86
llvm::HexStyle::Style
Style
Definition: MCInstPrinter.h:32
FileWriter.h
llvm::gsym::FileWriter
A simplified binary data writer class that doesn't require targets, target definitions,...
Definition: FileWriter.h:29
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::sys::path::parent_path
StringRef parent_path(StringRef path, Style style=Style::native)
Get parent path.
Definition: Path.cpp:465
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
UUID
std::pair< llvm::MachO::Target, std::string > UUID
Definition: TextStubCommon.h:23
llvm::gsym::GsymCreator::forEachFunctionInfo
void forEachFunctionInfo(std::function< bool(FunctionInfo &)> const &Callback)
Thread safe iteration over all function infos.
Definition: GsymCreator.cpp:330
llvm::StringTableBuilder::write
void write(raw_ostream &OS) const
Definition: StringTableBuilder.cpp:60
llvm::gsym::GsymCreator::getNumFunctionInfos
size_t getNumFunctionInfos() const
Get the current number of FunctionInfo objects contained in this object.
Definition: GsymCreator.cpp:348
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::gsym::GsymCreator::addFunctionInfo
void addFunctionInfo(FunctionInfo &&FI)
Add a function info to this GSYM creator.
Definition: GsymCreator.cpp:324
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::gsym::GsymCreator::insertString
uint32_t insertString(StringRef S, bool Copy=true)
Insert a string into the GSYM string table.
Definition: GsymCreator.cpp:303
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::raw_fd_ostream
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:443
llvm::gsym::FileEntry
Files in GSYM are contained in FileEntry structs where we split the directory and basename into two d...
Definition: FileEntry.h:24
llvm::gsym::Header::NumAddresses
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1578
LineTable.h
llvm::createStringError
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1231
uint16_t
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::StringTableBuilder::add
size_t add(CachedHashStringRef S)
Add a string to the builder.
Definition: StringTableBuilder.cpp:201
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1492
llvm::StringTableBuilder
Utility for building string tables with deduplicated suffixes.
Definition: StringTableBuilder.h:23
llvm::gsym::Header::StrtabOffset
uint32_t StrtabOffset
The file relative offset of the start of the string table for strings contained in the GSYM file.
Definition: Header.h:72
llvm::gsym::GsymCreator::insertFile
uint32_t insertFile(StringRef Path, sys::path::Style Style=sys::path::Style::native)
Insert a file into this GSYM creator.
Definition: GsymCreator.cpp:28
llvm::sys::path::filename
StringRef filename(StringRef path, Style style=Style::native)
Get filename.
Definition: Path.cpp:573
llvm::gsym::Header::Version
uint16_t Version
The version can number determines how the header is decoded and how each InfoType in FunctionInfo is ...
Definition: Header.h:54
llvm::gsym::GsymCreator::save
llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const
Save a GSYM file to a stand alone file.
Definition: GsymCreator.cpp:48
llvm::support::endianness
endianness
Definition: Endian.h:27
Header.h
raw_ostream.h
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::gsym::GsymCreator::finalize
llvm::Error finalize(llvm::raw_ostream &OS)
Finalize the data in the GSYM creator prior to saving the data out.
Definition: GsymCreator.cpp:191
GsymCreator.h
llvm::gsym::GsymCreator::GsymCreator
GsymCreator(bool Quiet=false)
Definition: GsymCreator.cpp:23
File
Instrumentation for Order File
Definition: InstrOrderFile.cpp:205
llvm::gsym::Header::AddrOffSize
uint8_t AddrOffSize
The size in bytes of each address offset in the address offsets table.
Definition: Header.h:56
removeIfBinary
static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt, BinaryPredicate Pred)
Definition: GsymCreator.cpp:174
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58