LLVM 22.0.0git
GsymReader.h
Go to the documentation of this file.
1//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11
12#include "llvm/ADT/ArrayRef.h"
20#include "llvm/Support/Endian.h"
22#include <inttypes.h>
23#include <memory>
24#include <stdint.h>
25#include <vector>
26
27namespace llvm {
28class MemoryBuffer;
29class raw_ostream;
30
31namespace gsym {
32
33/// GsymReader is used to read GSYM data from a file or buffer.
34///
35/// This class is optimized for very quick lookups when the endianness matches
36/// the host system. The Header, address table, address info offsets, and file
37/// table is designed to be mmap'ed as read only into memory and used without
38/// any parsing needed. If the endianness doesn't match, we swap these objects
39/// and tables into GsymReader::SwappedData and then point our header and
40/// ArrayRefs to this swapped internal data.
41///
42/// GsymReader objects must use one of the static functions to create an
43/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
44
46 GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
48
49 std::unique_ptr<MemoryBuffer> MemBuffer;
50 StringRef GsymBytes;
51 llvm::endianness Endian;
52 const Header *Hdr = nullptr;
53 ArrayRef<uint8_t> AddrOffsets;
54 ArrayRef<uint32_t> AddrInfoOffsets;
56 StringTable StrTab;
57 /// When the GSYM file's endianness doesn't match the host system then
58 /// we must decode all data structures that need to be swapped into
59 /// local storage and set point the ArrayRef objects above to these swapped
60 /// copies.
61 struct SwappedData {
62 Header Hdr;
63 std::vector<uint8_t> AddrOffsets;
64 std::vector<uint32_t> AddrInfoOffsets;
65 std::vector<FileEntry> Files;
66 };
67 std::unique_ptr<SwappedData> Swap;
68
69public:
72
73 /// Construct a GsymReader from a file on disk.
74 ///
75 /// \param Path The file path the GSYM file to read.
76 /// \returns An expected GsymReader that contains the object or an error
77 /// object that indicates reason for failing to read the GSYM.
79
80 /// Construct a GsymReader from a buffer.
81 ///
82 /// \param Bytes A set of bytes that will be copied and owned by the
83 /// returned object on success.
84 /// \returns An expected GsymReader that contains the object or an error
85 /// object that indicates reason for failing to read the GSYM.
87
88 /// Access the GSYM header.
89 /// \returns A native endian version of the GSYM header.
90 LLVM_ABI const Header &getHeader() const;
91
92 /// Get the full function info for an address.
93 ///
94 /// This should be called when a client will store a copy of the complete
95 /// FunctionInfo for a given address. For one off lookups, use the lookup()
96 /// function below.
97 ///
98 /// Symbolication server processes might want to parse the entire function
99 /// info for a given address and cache it if the process stays around to
100 /// service many symbolication addresses, like for parsing profiling
101 /// information.
102 ///
103 /// \param Addr A virtual address from the orignal object file to lookup.
104 ///
105 /// \returns An expected FunctionInfo that contains the function info object
106 /// or an error object that indicates reason for failing to lookup the
107 /// address.
109
110 /// Get the full function info given an address index.
111 ///
112 /// \param AddrIdx A address index for an address in the address table.
113 ///
114 /// \returns An expected FunctionInfo that contains the function info object
115 /// or an error object that indicates reason for failing get the function
116 /// info object.
118 getFunctionInfoAtIndex(uint64_t AddrIdx) const;
119
120 /// Lookup an address in the a GSYM.
121 ///
122 /// Lookup just the information needed for a specific address \a Addr. This
123 /// function is faster that calling getFunctionInfo() as it will only return
124 /// information that pertains to \a Addr and allows the parsing to skip any
125 /// extra information encoded for other addresses. For example the line table
126 /// parsing can stop when a matching LineEntry has been fouhnd, and the
127 /// InlineInfo can stop parsing early once a match has been found and also
128 /// skip information that doesn't match. This avoids memory allocations and
129 /// is much faster for lookups.
130 ///
131 /// \param Addr A virtual address from the orignal object file to lookup.
132 ///
133 /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
134 /// non-null, will be set to the raw data of the MergedFunctionInfo, if
135 /// present.
136 ///
137 /// \returns An expected LookupResult that contains only the information
138 /// needed for the current address, or an error object that indicates reason
139 /// for failing to lookup the address.
142 std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
143
144 /// Lookup all merged functions for a given address.
145 ///
146 /// This function performs a lookup for the specified address and then
147 /// retrieves additional LookupResults from any merged functions associated
148 /// with the primary LookupResult.
149 ///
150 /// \param Addr The address to lookup.
151 ///
152 /// \returns A vector of LookupResult objects, where the first element is the
153 /// primary result, followed by results for any merged functions
155 lookupAll(uint64_t Addr) const;
156
157 /// Get a string from the string table.
158 ///
159 /// \param Offset The string table offset for the string to retrieve.
160 /// \returns The string from the strin table.
161 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
162
163 /// Get the a file entry for the suppplied file index.
164 ///
165 /// Used to convert any file indexes in the FunctionInfo data back into
166 /// files. This function can be used for iteration, but is more commonly used
167 /// for random access when doing lookups.
168 ///
169 /// \param Index An index into the file table.
170 /// \returns An optional FileInfo that will be valid if the file index is
171 /// valid, or std::nullopt if the file index is out of bounds,
172 std::optional<FileEntry> getFile(uint32_t Index) const {
173 if (Index < Files.size())
174 return Files[Index];
175 return std::nullopt;
176 }
177
178 /// Dump the entire Gsym data contained in this object.
179 ///
180 /// \param OS The output stream to dump to.
182
183 /// Dump a FunctionInfo object.
184 ///
185 /// This function will convert any string table indexes and file indexes
186 /// into human readable format.
187 ///
188 /// \param OS The output stream to dump to.
189 ///
190 /// \param FI The object to dump.
191 ///
192 /// \param Indent The indentation as number of spaces. Used when dumping as an
193 /// item within MergedFunctionsInfo.
194 LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
195 uint32_t Indent = 0);
196
197 /// Dump a MergedFunctionsInfo object.
198 ///
199 /// This function will dump a MergedFunctionsInfo object - basically by
200 /// dumping the contained FunctionInfo objects with indentation.
201 ///
202 /// \param OS The output stream to dump to.
203 ///
204 /// \param MFI The object to dump.
206
207 /// Dump a CallSiteInfo object.
208 ///
209 /// This function will output the details of a CallSiteInfo object in a
210 /// human-readable format.
211 ///
212 /// \param OS The output stream to dump to.
213 ///
214 /// \param CSI The CallSiteInfo object to dump.
215 LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
216
217 /// Dump a CallSiteInfoCollection object.
218 ///
219 /// This function will iterate over a collection of CallSiteInfo objects and
220 /// dump each one.
221 ///
222 /// \param OS The output stream to dump to.
223 ///
224 /// \param CSIC The CallSiteInfoCollection object to dump.
225 ///
226 /// \param Indent The indentation as number of spaces. Used when dumping as an
227 /// item from within MergedFunctionsInfo.
229 uint32_t Indent = 0);
230
231 /// Dump a LineTable object.
232 ///
233 /// This function will convert any string table indexes and file indexes
234 /// into human readable format.
235 ///
236 ///
237 /// \param OS The output stream to dump to.
238 ///
239 /// \param LT The object to dump.
240 ///
241 /// \param Indent The indentation as number of spaces. Used when dumping as an
242 /// item from within MergedFunctionsInfo.
243 LLVM_ABI void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
244
245 /// Dump a InlineInfo object.
246 ///
247 /// This function will convert any string table indexes and file indexes
248 /// into human readable format.
249 ///
250 /// \param OS The output stream to dump to.
251 ///
252 /// \param II The object to dump.
253 ///
254 /// \param Indent The indentation as number of spaces. Used for recurive
255 /// dumping.
256 LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
257 uint32_t Indent = 0);
258
259 /// Dump a FileEntry object.
260 ///
261 /// This function will convert any string table indexes into human readable
262 /// format.
263 ///
264 /// \param OS The output stream to dump to.
265 ///
266 /// \param FE The object to dump.
267 LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
268
269 /// Get the number of addresses in this Gsym file.
271 return Hdr->NumAddresses;
272 }
273
274 /// Gets an address from the address table.
275 ///
276 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
277 ///
278 /// \param Index A index into the address table.
279 /// \returns A resolved virtual address for adddress in the address table
280 /// or std::nullopt if Index is out of bounds.
281 LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
282
283protected:
284
285 /// Get an appropriate address info offsets array.
286 ///
287 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
288 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
289 /// internally as a array of bytes that are in the correct endianness. When
290 /// we access this table we must get an array that matches those sizes. This
291 /// templatized helper function is used when accessing address offsets in the
292 /// AddrOffsets member variable.
293 ///
294 /// \returns An ArrayRef of an appropriate address offset size.
295 template <class T> ArrayRef<T>
297 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
298 AddrOffsets.size()/sizeof(T));
299 }
300
301 /// Get an appropriate address from the address table.
302 ///
303 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
304 /// byte address offsets from the The gsym::Header::BaseAddress. The table is
305 /// stored internally as a array of bytes that are in the correct endianness.
306 /// In order to extract an address from the address table we must access the
307 /// address offset using the correct size and then add it to the BaseAddress
308 /// in the header.
309 ///
310 /// \param Index An index into the AddrOffsets array.
311 /// \returns An virtual address that matches the original object file for the
312 /// address as the specified index, or std::nullopt if Index is out of bounds.
313 template <class T>
314 std::optional<uint64_t> addressForIndex(size_t Index) const {
315 ArrayRef<T> AIO = getAddrOffsets<T>();
316 if (Index < AIO.size())
317 return AIO[Index] + Hdr->BaseAddress;
318 return std::nullopt;
319 }
320 /// Lookup an address offset in the AddrOffsets table.
321 ///
322 /// Given an address offset, look it up using a binary search of the
323 /// AddrOffsets table.
324 ///
325 /// \param AddrOffset An address offset, that has already been computed by
326 /// subtracting the gsym::Header::BaseAddress.
327 /// \returns The matching address offset index. This index will be used to
328 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
329 template <class T>
330 std::optional<uint64_t>
331 getAddressOffsetIndex(const uint64_t AddrOffset) const {
332 ArrayRef<T> AIO = getAddrOffsets<T>();
333 const auto Begin = AIO.begin();
334 const auto End = AIO.end();
335 auto Iter = std::lower_bound(Begin, End, AddrOffset);
336 // Watch for addresses that fall between the gsym::Header::BaseAddress and
337 // the first address offset.
338 if (Iter == Begin && AddrOffset < *Begin)
339 return std::nullopt;
340 if (Iter == End || AddrOffset < *Iter)
341 --Iter;
342
343 // GSYM files have sorted function infos with the most information (line
344 // table and/or inline info) first in the array of function infos, so
345 // always backup as much as possible as long as the address offset is the
346 // same as the previous entry.
347 while (Iter != Begin) {
348 auto Prev = Iter - 1;
349 if (*Prev == *Iter)
350 Iter = Prev;
351 else
352 break;
353 }
354
355 return std::distance(Begin, Iter);
356 }
357
358 /// Create a GSYM from a memory buffer.
359 ///
360 /// Called by both openFile() and copyBuffer(), this function does all of the
361 /// work of parsing the GSYM file and returning an error.
362 ///
363 /// \param MemBuffer A memory buffer that will transfer ownership into the
364 /// GsymReader.
365 /// \returns An expected GsymReader that contains the object or an error
366 /// object that indicates reason for failing to read the GSYM.
368 create(std::unique_ptr<MemoryBuffer> &MemBuffer);
369
370 /// Given an address, find the address index.
371 ///
372 /// Binary search the address table and find the matching address index.
373 ///
374 /// \param Addr A virtual address that matches the original object file
375 /// to lookup.
376 /// \returns An index into the address table. This index can be used to
377 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
378 /// Returns an error if the address isn't in the GSYM with details of why.
380
381 /// Given an address index, get the offset for the FunctionInfo.
382 ///
383 /// Looking up an address is done by finding the corresponding address
384 /// index for the address. This index is then used to get the offset of the
385 /// FunctionInfo data that we will decode using this function.
386 ///
387 /// \param Index An index into the address table.
388 /// \returns An optional GSYM data offset for the offset of the FunctionInfo
389 /// that needs to be decoded.
390 LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
391
392 /// Given an address, find the correct function info data and function
393 /// address.
394 ///
395 /// Binary search the address table and find the matching address info
396 /// and make sure that the function info contains the address. GSYM allows
397 /// functions to overlap, and the most debug info is contained in the first
398 /// entries due to the sorting when GSYM files are created. We can have
399 /// multiple function info that start at the same address only if their
400 /// address range doesn't match. So find the first entry that matches \a Addr
401 /// and iterate forward until we find one that contains the address.
402 ///
403 /// \param[in] Addr A virtual address that matches the original object file
404 /// to lookup.
405 ///
406 /// \param[out] FuncStartAddr A virtual address that is the base address of
407 /// the function that is used for decoding the FunctionInfo.
408 ///
409 /// \returns An valid data extractor on success, or an error if we fail to
410 /// find the address in a function info or corrrectly decode the data
413
414 /// Get the function data and address given an address index.
415 ///
416 /// \param AddrIdx A address index from the address table.
417 ///
418 /// \returns An expected FunctionInfo that contains the function info object
419 /// or an error object that indicates reason for failing to lookup the
420 /// address.
422 getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
423};
424
425} // namespace gsym
426} // namespace llvm
427
428#endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
#define LLVM_ABI
Definition: Compiler.h:213
uint64_t Addr
uint32_t Index
bool End
Definition: ELF_riscv.cpp:480
Provides ErrorOr<T> smart pointer.
uint64_t IntrinsicInst * II
raw_pwrite_stream & OS
Value * RHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
const T * data() const
Definition: ArrayRef.h:144
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Tagged union holding either a T or a Error.
Definition: Error.h:485
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:45
std::optional< FileEntry > getFile(uint32_t Index) const
Get the a file entry for the suppplied file index.
Definition: GsymReader.h:172
LLVM_ABI void dump(raw_ostream &OS)
Dump the entire Gsym data contained in this object.
Definition: GsymReader.cpp:383
uint32_t getNumAddresses() const
Get the number of addresses in this Gsym file.
Definition: GsymReader.h:270
static LLVM_ABI llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:32
LLVM_ABI std::optional< uint64_t > getAddress(size_t Index) const
Gets an address from the address table.
Definition: GsymReader.cpp:207
LLVM_ABI std::optional< uint64_t > getAddressInfoOffset(size_t Index) const
Given an address index, get the offset for the FunctionInfo.
Definition: GsymReader.cpp:217
ArrayRef< T > getAddrOffsets() const
Get an appropriate address info offsets array.
Definition: GsymReader.h:296
StringRef getString(uint32_t Offset) const
Get a string from the string table.
Definition: GsymReader.h:161
LLVM_ABI llvm::Expected< FunctionInfo > getFunctionInfo(uint64_t Addr) const
Get the full function info for an address.
Definition: GsymReader.cpp:320
LLVM_ABI const Header & getHeader() const
Access the GSYM header.
Definition: GsymReader.cpp:199
std::optional< uint64_t > addressForIndex(size_t Index) const
Get an appropriate address from the address table.
Definition: GsymReader.h:314
LLVM_ABI llvm::Expected< llvm::DataExtractor > getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const
Get the function data and address given an address index.
Definition: GsymReader.cpp:299
LLVM_ABI Expected< uint64_t > getAddressIndex(const uint64_t Addr) const
Given an address, find the address index.
Definition: GsymReader.cpp:225
LLVM_ABI GsymReader(GsymReader &&RHS)
static LLVM_ABI llvm::Expected< GsymReader > copyBuffer(StringRef Bytes)
Construct a GsymReader from a buffer.
Definition: GsymReader.cpp:42
LLVM_ABI llvm::Expected< LookupResult > lookup(uint64_t Addr, std::optional< DataExtractor > *MergedFuncsData=nullptr) const
Lookup an address in the a GSYM.
Definition: GsymReader.cpp:338
static LLVM_ABI llvm::Expected< llvm::gsym::GsymReader > create(std::unique_ptr< MemoryBuffer > &MemBuffer)
Create a GSYM from a memory buffer.
Definition: GsymReader.cpp:48
LLVM_ABI llvm::Expected< FunctionInfo > getFunctionInfoAtIndex(uint64_t AddrIdx) const
Get the full function info given an address index.
Definition: GsymReader.cpp:329
LLVM_ABI llvm::Expected< llvm::DataExtractor > getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const
Given an address, find the correct function info data and function address.
Definition: GsymReader.cpp:256
LLVM_ABI llvm::Expected< std::vector< LookupResult > > lookupAll(uint64_t Addr) const
Lookup all merged functions for a given address.
Definition: GsymReader.cpp:349
std::optional< uint64_t > getAddressOffsetIndex(const uint64_t AddrOffset) const
Lookup an address offset in the AddrOffsets table.
Definition: GsymReader.h:331
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition: LineTable.h:119
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
endianness
Definition: bit.h:71
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:93
The GSYM header.
Definition: Header.h:46
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:65
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to.
Definition: Header.h:63
Inline information stores the name of the inline function along with an array of address ranges.
Definition: InlineInfo.h:60
String tables in GSYM files are required to start with an empty string at offset zero.
Definition: StringTable.h:21
Definition: regcomp.c:186