LLVM 20.0.0git
GsymReader.h
Go to the documentation of this file.
1//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11
12#include "llvm/ADT/ArrayRef.h"
19#include "llvm/Support/Endian.h"
21#include <inttypes.h>
22#include <memory>
23#include <stdint.h>
24#include <vector>
25
26namespace llvm {
27class MemoryBuffer;
28class raw_ostream;
29
30namespace gsym {
31
32/// GsymReader is used to read GSYM data from a file or buffer.
33///
34/// This class is optimized for very quick lookups when the endianness matches
35/// the host system. The Header, address table, address info offsets, and file
36/// table is designed to be mmap'ed as read only into memory and used without
37/// any parsing needed. If the endianness doesn't match, we swap these objects
38/// and tables into GsymReader::SwappedData and then point our header and
39/// ArrayRefs to this swapped internal data.
40///
41/// GsymReader objects must use one of the static functions to create an
42/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
43
45 GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
47
48 std::unique_ptr<MemoryBuffer> MemBuffer;
49 StringRef GsymBytes;
50 llvm::endianness Endian;
51 const Header *Hdr = nullptr;
52 ArrayRef<uint8_t> AddrOffsets;
53 ArrayRef<uint32_t> AddrInfoOffsets;
55 StringTable StrTab;
56 /// When the GSYM file's endianness doesn't match the host system then
57 /// we must decode all data structures that need to be swapped into
58 /// local storage and set point the ArrayRef objects above to these swapped
59 /// copies.
60 struct SwappedData {
61 Header Hdr;
62 std::vector<uint8_t> AddrOffsets;
63 std::vector<uint32_t> AddrInfoOffsets;
64 std::vector<FileEntry> Files;
65 };
66 std::unique_ptr<SwappedData> Swap;
67
68public:
71
72 /// Construct a GsymReader from a file on disk.
73 ///
74 /// \param Path The file path the GSYM file to read.
75 /// \returns An expected GsymReader that contains the object or an error
76 /// object that indicates reason for failing to read the GSYM.
78
79 /// Construct a GsymReader from a buffer.
80 ///
81 /// \param Bytes A set of bytes that will be copied and owned by the
82 /// returned object on success.
83 /// \returns An expected GsymReader that contains the object or an error
84 /// object that indicates reason for failing to read the GSYM.
86
87 /// Access the GSYM header.
88 /// \returns A native endian version of the GSYM header.
89 const Header &getHeader() const;
90
91 /// Get the full function info for an address.
92 ///
93 /// This should be called when a client will store a copy of the complete
94 /// FunctionInfo for a given address. For one off lookups, use the lookup()
95 /// function below.
96 ///
97 /// Symbolication server processes might want to parse the entire function
98 /// info for a given address and cache it if the process stays around to
99 /// service many symbolication addresses, like for parsing profiling
100 /// information.
101 ///
102 /// \param Addr A virtual address from the orignal object file to lookup.
103 ///
104 /// \returns An expected FunctionInfo that contains the function info object
105 /// or an error object that indicates reason for failing to lookup the
106 /// address.
108
109 /// Get the full function info given an address index.
110 ///
111 /// \param AddrIdx A address index for an address in the address table.
112 ///
113 /// \returns An expected FunctionInfo that contains the function info object
114 /// or an error object that indicates reason for failing get the function
115 /// info object.
117
118 /// Lookup an address in the a GSYM.
119 ///
120 /// Lookup just the information needed for a specific address \a Addr. This
121 /// function is faster that calling getFunctionInfo() as it will only return
122 /// information that pertains to \a Addr and allows the parsing to skip any
123 /// extra information encoded for other addresses. For example the line table
124 /// parsing can stop when a matching LineEntry has been fouhnd, and the
125 /// InlineInfo can stop parsing early once a match has been found and also
126 /// skip information that doesn't match. This avoids memory allocations and
127 /// is much faster for lookups.
128 ///
129 /// \param Addr A virtual address from the orignal object file to lookup.
130 /// \returns An expected LookupResult that contains only the information
131 /// needed for the current address, or an error object that indicates reason
132 /// for failing to lookup the address.
134
135 /// Get a string from the string table.
136 ///
137 /// \param Offset The string table offset for the string to retrieve.
138 /// \returns The string from the strin table.
139 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
140
141 /// Get the a file entry for the suppplied file index.
142 ///
143 /// Used to convert any file indexes in the FunctionInfo data back into
144 /// files. This function can be used for iteration, but is more commonly used
145 /// for random access when doing lookups.
146 ///
147 /// \param Index An index into the file table.
148 /// \returns An optional FileInfo that will be valid if the file index is
149 /// valid, or std::nullopt if the file index is out of bounds,
150 std::optional<FileEntry> getFile(uint32_t Index) const {
151 if (Index < Files.size())
152 return Files[Index];
153 return std::nullopt;
154 }
155
156 /// Dump the entire Gsym data contained in this object.
157 ///
158 /// \param OS The output stream to dump to.
159 void dump(raw_ostream &OS);
160
161 /// Dump a FunctionInfo object.
162 ///
163 /// This function will convert any string table indexes and file indexes
164 /// into human readable format.
165 ///
166 /// \param OS The output stream to dump to.
167 ///
168 /// \param FI The object to dump.
169 ///
170 /// \param Indent The indentation as number of spaces. Used when dumping as an
171 /// item within MergedFunctionsInfo.
172 void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0);
173
174 /// Dump a MergedFunctionsInfo object.
175 ///
176 /// This function will dump a MergedFunctionsInfo object - basically by
177 /// dumping the contained FunctionInfo objects with indentation.
178 ///
179 /// \param OS The output stream to dump to.
180 ///
181 /// \param MFI The object to dump.
182 void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
183
184 /// Dump a CallSiteInfo object.
185 ///
186 /// This function will output the details of a CallSiteInfo object in a
187 /// human-readable format.
188 ///
189 /// \param OS The output stream to dump to.
190 ///
191 /// \param CSI The CallSiteInfo object to dump.
192 void dump(raw_ostream &OS, const CallSiteInfo &CSI);
193
194 /// Dump a CallSiteInfoCollection object.
195 ///
196 /// This function will iterate over a collection of CallSiteInfo objects and
197 /// dump each one.
198 ///
199 /// \param OS The output stream to dump to.
200 ///
201 /// \param CSIC The CallSiteInfoCollection object to dump.
202 ///
203 /// \param Indent The indentation as number of spaces. Used when dumping as an
204 /// item from within MergedFunctionsInfo.
205 void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
206 uint32_t Indent = 0);
207
208 /// Dump a LineTable object.
209 ///
210 /// This function will convert any string table indexes and file indexes
211 /// into human readable format.
212 ///
213 ///
214 /// \param OS The output stream to dump to.
215 ///
216 /// \param LT The object to dump.
217 ///
218 /// \param Indent The indentation as number of spaces. Used when dumping as an
219 /// item from within MergedFunctionsInfo.
220 void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
221
222 /// Dump a InlineInfo object.
223 ///
224 /// This function will convert any string table indexes and file indexes
225 /// into human readable format.
226 ///
227 /// \param OS The output stream to dump to.
228 ///
229 /// \param II The object to dump.
230 ///
231 /// \param Indent The indentation as number of spaces. Used for recurive
232 /// dumping.
233 void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
234
235 /// Dump a FileEntry object.
236 ///
237 /// This function will convert any string table indexes into human readable
238 /// format.
239 ///
240 /// \param OS The output stream to dump to.
241 ///
242 /// \param FE The object to dump.
243 void dump(raw_ostream &OS, std::optional<FileEntry> FE);
244
245 /// Get the number of addresses in this Gsym file.
247 return Hdr->NumAddresses;
248 }
249
250 /// Gets an address from the address table.
251 ///
252 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
253 ///
254 /// \param Index A index into the address table.
255 /// \returns A resolved virtual address for adddress in the address table
256 /// or std::nullopt if Index is out of bounds.
257 std::optional<uint64_t> getAddress(size_t Index) const;
258
259protected:
260
261 /// Get an appropriate address info offsets array.
262 ///
263 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
264 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
265 /// internally as a array of bytes that are in the correct endianness. When
266 /// we access this table we must get an array that matches those sizes. This
267 /// templatized helper function is used when accessing address offsets in the
268 /// AddrOffsets member variable.
269 ///
270 /// \returns An ArrayRef of an appropriate address offset size.
271 template <class T> ArrayRef<T>
273 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
274 AddrOffsets.size()/sizeof(T));
275 }
276
277 /// Get an appropriate address from the address table.
278 ///
279 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
280 /// byte address offsets from the The gsym::Header::BaseAddress. The table is
281 /// stored internally as a array of bytes that are in the correct endianness.
282 /// In order to extract an address from the address table we must access the
283 /// address offset using the correct size and then add it to the BaseAddress
284 /// in the header.
285 ///
286 /// \param Index An index into the AddrOffsets array.
287 /// \returns An virtual address that matches the original object file for the
288 /// address as the specified index, or std::nullopt if Index is out of bounds.
289 template <class T>
290 std::optional<uint64_t> addressForIndex(size_t Index) const {
291 ArrayRef<T> AIO = getAddrOffsets<T>();
292 if (Index < AIO.size())
293 return AIO[Index] + Hdr->BaseAddress;
294 return std::nullopt;
295 }
296 /// Lookup an address offset in the AddrOffsets table.
297 ///
298 /// Given an address offset, look it up using a binary search of the
299 /// AddrOffsets table.
300 ///
301 /// \param AddrOffset An address offset, that has already been computed by
302 /// subtracting the gsym::Header::BaseAddress.
303 /// \returns The matching address offset index. This index will be used to
304 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
305 template <class T>
306 std::optional<uint64_t>
307 getAddressOffsetIndex(const uint64_t AddrOffset) const {
308 ArrayRef<T> AIO = getAddrOffsets<T>();
309 const auto Begin = AIO.begin();
310 const auto End = AIO.end();
311 auto Iter = std::lower_bound(Begin, End, AddrOffset);
312 // Watch for addresses that fall between the gsym::Header::BaseAddress and
313 // the first address offset.
314 if (Iter == Begin && AddrOffset < *Begin)
315 return std::nullopt;
316 if (Iter == End || AddrOffset < *Iter)
317 --Iter;
318
319 // GSYM files have sorted function infos with the most information (line
320 // table and/or inline info) first in the array of function infos, so
321 // always backup as much as possible as long as the address offset is the
322 // same as the previous entry.
323 while (Iter != Begin) {
324 auto Prev = Iter - 1;
325 if (*Prev == *Iter)
326 Iter = Prev;
327 else
328 break;
329 }
330
331 return std::distance(Begin, Iter);
332 }
333
334 /// Create a GSYM from a memory buffer.
335 ///
336 /// Called by both openFile() and copyBuffer(), this function does all of the
337 /// work of parsing the GSYM file and returning an error.
338 ///
339 /// \param MemBuffer A memory buffer that will transfer ownership into the
340 /// GsymReader.
341 /// \returns An expected GsymReader that contains the object or an error
342 /// object that indicates reason for failing to read the GSYM.
344 create(std::unique_ptr<MemoryBuffer> &MemBuffer);
345
346
347 /// Given an address, find the address index.
348 ///
349 /// Binary search the address table and find the matching address index.
350 ///
351 /// \param Addr A virtual address that matches the original object file
352 /// to lookup.
353 /// \returns An index into the address table. This index can be used to
354 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
355 /// Returns an error if the address isn't in the GSYM with details of why.
357
358 /// Given an address index, get the offset for the FunctionInfo.
359 ///
360 /// Looking up an address is done by finding the corresponding address
361 /// index for the address. This index is then used to get the offset of the
362 /// FunctionInfo data that we will decode using this function.
363 ///
364 /// \param Index An index into the address table.
365 /// \returns An optional GSYM data offset for the offset of the FunctionInfo
366 /// that needs to be decoded.
367 std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
368
369 /// Given an address, find the correct function info data and function
370 /// address.
371 ///
372 /// Binary search the address table and find the matching address info
373 /// and make sure that the function info contains the address. GSYM allows
374 /// functions to overlap, and the most debug info is contained in the first
375 /// entries due to the sorting when GSYM files are created. We can have
376 /// multiple function info that start at the same address only if their
377 /// address range doesn't match. So find the first entry that matches \a Addr
378 /// and iterate forward until we find one that contains the address.
379 ///
380 /// \param[in] Addr A virtual address that matches the original object file
381 /// to lookup.
382 ///
383 /// \param[out] FuncStartAddr A virtual address that is the base address of
384 /// the function that is used for decoding the FunctionInfo.
385 ///
386 /// \returns An valid data extractor on success, or an error if we fail to
387 /// find the address in a function info or corrrectly decode the data
390
391 /// Get the function data and address given an address index.
392 ///
393 /// \param AddrIdx A address index from the address table.
394 ///
395 /// \returns An expected FunctionInfo that contains the function info object
396 /// or an error object that indicates reason for failing to lookup the
397 /// address.
399 getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
400};
401
402} // namespace gsym
403} // namespace llvm
404
405#endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
uint64_t Addr
uint32_t Index
bool End
Definition: ELF_riscv.cpp:480
Provides ErrorOr<T> smart pointer.
uint64_t IntrinsicInst * II
raw_pwrite_stream & OS
Value * RHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
const T * data() const
Definition: ArrayRef.h:165
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:44
std::optional< FileEntry > getFile(uint32_t Index) const
Get the a file entry for the suppplied file index.
Definition: GsymReader.h:150
void dump(raw_ostream &OS)
Dump the entire Gsym data contained in this object.
Definition: GsymReader.cpp:345
uint32_t getNumAddresses() const
Get the number of addresses in this Gsym file.
Definition: GsymReader.h:246
static llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:32
std::optional< uint64_t > getAddress(size_t Index) const
Gets an address from the address table.
Definition: GsymReader.cpp:207
std::optional< uint64_t > getAddressInfoOffset(size_t Index) const
Given an address index, get the offset for the FunctionInfo.
Definition: GsymReader.cpp:217
ArrayRef< T > getAddrOffsets() const
Get an appropriate address info offsets array.
Definition: GsymReader.h:272
StringRef getString(uint32_t Offset) const
Get a string from the string table.
Definition: GsymReader.h:139
llvm::Expected< FunctionInfo > getFunctionInfo(uint64_t Addr) const
Get the full function info for an address.
Definition: GsymReader.cpp:320
const Header & getHeader() const
Access the GSYM header.
Definition: GsymReader.cpp:199
std::optional< uint64_t > addressForIndex(size_t Index) const
Get an appropriate address from the address table.
Definition: GsymReader.h:290
llvm::Expected< llvm::DataExtractor > getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const
Get the function data and address given an address index.
Definition: GsymReader.cpp:299
Expected< uint64_t > getAddressIndex(const uint64_t Addr) const
Given an address, find the address index.
Definition: GsymReader.cpp:225
llvm::Expected< LookupResult > lookup(uint64_t Addr) const
Lookup an address in the a GSYM.
Definition: GsymReader.cpp:337
GsymReader(GsymReader &&RHS)
static llvm::Expected< GsymReader > copyBuffer(StringRef Bytes)
Construct a GsymReader from a buffer.
Definition: GsymReader.cpp:42
static llvm::Expected< llvm::gsym::GsymReader > create(std::unique_ptr< MemoryBuffer > &MemBuffer)
Create a GSYM from a memory buffer.
Definition: GsymReader.cpp:48
llvm::Expected< FunctionInfo > getFunctionInfoAtIndex(uint64_t AddrIdx) const
Get the full function info given an address index.
Definition: GsymReader.cpp:329
llvm::Expected< llvm::DataExtractor > getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const
Given an address, find the correct function info data and function address.
Definition: GsymReader.cpp:256
std::optional< uint64_t > getAddressOffsetIndex(const uint64_t AddrOffset) const
Lookup an address offset in the AddrOffsets table.
Definition: GsymReader.h:307
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition: LineTable.h:118
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
endianness
Definition: bit.h:70
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:92
The GSYM header.
Definition: Header.h:45
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to.
Definition: Header.h:62
Inline information stores the name of the inline function along with an array of address ranges.
Definition: InlineInfo.h:59
String tables in GSYM files are required to start with an empty string at offset zero.
Definition: StringTable.h:21
Definition: regcomp.c:192