LLVM  14.0.0git
GsymReader.h
Go to the documentation of this file.
1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11 
12 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/ErrorOr.h"
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <vector>
25 
26 namespace llvm {
27 class MemoryBuffer;
28 class raw_ostream;
29 
30 namespace gsym {
31 
32 /// GsymReader is used to read GSYM data from a file or buffer.
33 ///
34 /// This class is optimized for very quick lookups when the endianness matches
35 /// the host system. The Header, address table, address info offsets, and file
36 /// table is designed to be mmap'ed as read only into memory and used without
37 /// any parsing needed. If the endianness doesn't match, we swap these objects
38 /// and tables into GsymReader::SwappedData and then point our header and
39 /// ArrayRefs to this swapped internal data.
40 ///
41 /// GsymReader objects must use one of the static functions to create an
42 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
43 
44 class GsymReader {
45  GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
47 
48  std::unique_ptr<MemoryBuffer> MemBuffer;
49  StringRef GsymBytes;
51  const Header *Hdr = nullptr;
52  ArrayRef<uint8_t> AddrOffsets;
53  ArrayRef<uint32_t> AddrInfoOffsets;
54  ArrayRef<FileEntry> Files;
55  StringTable StrTab;
56  /// When the GSYM file's endianness doesn't match the host system then
57  /// we must decode all data structures that need to be swapped into
58  /// local storage and set point the ArrayRef objects above to these swapped
59  /// copies.
60  struct SwappedData {
61  Header Hdr;
62  std::vector<uint8_t> AddrOffsets;
63  std::vector<uint32_t> AddrInfoOffsets;
64  std::vector<FileEntry> Files;
65  };
66  std::unique_ptr<SwappedData> Swap;
67 
68 public:
69  GsymReader(GsymReader &&RHS);
70  ~GsymReader();
71 
72  /// Construct a GsymReader from a file on disk.
73  ///
74  /// \param Path The file path the GSYM file to read.
75  /// \returns An expected GsymReader that contains the object or an error
76  /// object that indicates reason for failing to read the GSYM.
78 
79  /// Construct a GsymReader from a buffer.
80  ///
81  /// \param Bytes A set of bytes that will be copied and owned by the
82  /// returned object on success.
83  /// \returns An expected GsymReader that contains the object or an error
84  /// object that indicates reason for failing to read the GSYM.
86 
87  /// Access the GSYM header.
88  /// \returns A native endian version of the GSYM header.
89  const Header &getHeader() const;
90 
91  /// Get the full function info for an address.
92  ///
93  /// This should be called when a client will store a copy of the complete
94  /// FunctionInfo for a given address. For one off lookups, use the lookup()
95  /// function below.
96  ///
97  /// Symbolication server processes might want to parse the entire function
98  /// info for a given address and cache it if the process stays around to
99  /// service many symbolication addresses, like for parsing profiling
100  /// information.
101  ///
102  /// \param Addr A virtual address from the orignal object file to lookup.
103  ///
104  /// \returns An expected FunctionInfo that contains the function info object
105  /// or an error object that indicates reason for failing to lookup the
106  /// address.
108 
109  /// Lookup an address in the a GSYM.
110  ///
111  /// Lookup just the information needed for a specific address \a Addr. This
112  /// function is faster that calling getFunctionInfo() as it will only return
113  /// information that pertains to \a Addr and allows the parsing to skip any
114  /// extra information encoded for other addresses. For example the line table
115  /// parsing can stop when a matching LineEntry has been fouhnd, and the
116  /// InlineInfo can stop parsing early once a match has been found and also
117  /// skip information that doesn't match. This avoids memory allocations and
118  /// is much faster for lookups.
119  ///
120  /// \param Addr A virtual address from the orignal object file to lookup.
121  /// \returns An expected LookupResult that contains only the information
122  /// needed for the current address, or an error object that indicates reason
123  /// for failing to lookup the address.
125 
126  /// Get a string from the string table.
127  ///
128  /// \param Offset The string table offset for the string to retrieve.
129  /// \returns The string from the strin table.
130  StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
131 
132  /// Get the a file entry for the suppplied file index.
133  ///
134  /// Used to convert any file indexes in the FunctionInfo data back into
135  /// files. This function can be used for iteration, but is more commonly used
136  /// for random access when doing lookups.
137  ///
138  /// \param Index An index into the file table.
139  /// \returns An optional FileInfo that will be valid if the file index is
140  /// valid, or llvm::None if the file index is out of bounds,
142  if (Index < Files.size())
143  return Files[Index];
144  return llvm::None;
145  }
146 
147  /// Dump the entire Gsym data contained in this object.
148  ///
149  /// \param OS The output stream to dump to.
150  void dump(raw_ostream &OS);
151 
152  /// Dump a FunctionInfo object.
153  ///
154  /// This function will convert any string table indexes and file indexes
155  /// into human readable format.
156  ///
157  /// \param OS The output stream to dump to.
158  ///
159  /// \param FI The object to dump.
160  void dump(raw_ostream &OS, const FunctionInfo &FI);
161 
162  /// Dump a LineTable object.
163  ///
164  /// This function will convert any string table indexes and file indexes
165  /// into human readable format.
166  ///
167  ///
168  /// \param OS The output stream to dump to.
169  ///
170  /// \param LT The object to dump.
171  void dump(raw_ostream &OS, const LineTable &LT);
172 
173  /// Dump a InlineInfo object.
174  ///
175  /// This function will convert any string table indexes and file indexes
176  /// into human readable format.
177  ///
178  /// \param OS The output stream to dump to.
179  ///
180  /// \param II The object to dump.
181  ///
182  /// \param Indent The indentation as number of spaces. Used for recurive
183  /// dumping.
184  void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
185 
186  /// Dump a FileEntry object.
187  ///
188  /// This function will convert any string table indexes into human readable
189  /// format.
190  ///
191  /// \param OS The output stream to dump to.
192  ///
193  /// \param FE The object to dump.
194  void dump(raw_ostream &OS, Optional<FileEntry> FE);
195 
196  /// Get the number of addresses in this Gsym file.
198  return Hdr->NumAddresses;
199  }
200 
201  /// Gets an address from the address table.
202  ///
203  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
204  ///
205  /// \param Index A index into the address table.
206  /// \returns A resolved virtual address for adddress in the address table
207  /// or llvm::None if Index is out of bounds.
208  Optional<uint64_t> getAddress(size_t Index) const;
209 
210 protected:
211 
212  /// Get an appropriate address info offsets array.
213  ///
214  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
215  /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
216  /// internally as a array of bytes that are in the correct endianness. When
217  /// we access this table we must get an array that matches those sizes. This
218  /// templatized helper function is used when accessing address offsets in the
219  /// AddrOffsets member variable.
220  ///
221  /// \returns An ArrayRef of an appropriate address offset size.
222  template <class T> ArrayRef<T>
223  getAddrOffsets() const {
224  return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
225  AddrOffsets.size()/sizeof(T));
226  }
227 
228  /// Get an appropriate address from the address table.
229  ///
230  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
231  /// byte address offsets from the The gsym::Header::BaseAddress. The table is
232  /// stored internally as a array of bytes that are in the correct endianness.
233  /// In order to extract an address from the address table we must access the
234  /// address offset using the correct size and then add it to the BaseAddress
235  /// in the header.
236  ///
237  /// \param Index An index into the AddrOffsets array.
238  /// \returns An virtual address that matches the original object file for the
239  /// address as the specified index, or llvm::None if Index is out of bounds.
240  template <class T> Optional<uint64_t>
241  addressForIndex(size_t Index) const {
242  ArrayRef<T> AIO = getAddrOffsets<T>();
243  if (Index < AIO.size())
244  return AIO[Index] + Hdr->BaseAddress;
245  return llvm::None;
246  }
247  /// Lookup an address offset in the AddrOffsets table.
248  ///
249  /// Given an address offset, look it up using a binary search of the
250  /// AddrOffsets table.
251  ///
252  /// \param AddrOffset An address offset, that has already been computed by
253  /// subtracting the gsym::Header::BaseAddress.
254  /// \returns The matching address offset index. This index will be used to
255  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
256  template <class T>
258  ArrayRef<T> AIO = getAddrOffsets<T>();
259  const auto Begin = AIO.begin();
260  const auto End = AIO.end();
261  auto Iter = std::lower_bound(Begin, End, AddrOffset);
262  // Watch for addresses that fall between the gsym::Header::BaseAddress and
263  // the first address offset.
264  if (Iter == Begin && AddrOffset < *Begin)
265  return llvm::None;
266  if (Iter == End || AddrOffset < *Iter)
267  --Iter;
268  return std::distance(Begin, Iter);
269  }
270 
271  /// Create a GSYM from a memory buffer.
272  ///
273  /// Called by both openFile() and copyBuffer(), this function does all of the
274  /// work of parsing the GSYM file and returning an error.
275  ///
276  /// \param MemBuffer A memory buffer that will transfer ownership into the
277  /// GsymReader.
278  /// \returns An expected GsymReader that contains the object or an error
279  /// object that indicates reason for failing to read the GSYM.
281  create(std::unique_ptr<MemoryBuffer> &MemBuffer);
282 
283 
284  /// Given an address, find the address index.
285  ///
286  /// Binary search the address table and find the matching address index.
287  ///
288  /// \param Addr A virtual address that matches the original object file
289  /// to lookup.
290  /// \returns An index into the address table. This index can be used to
291  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
292  /// Returns an error if the address isn't in the GSYM with details of why.
294 
295  /// Given an address index, get the offset for the FunctionInfo.
296  ///
297  /// Looking up an address is done by finding the corresponding address
298  /// index for the address. This index is then used to get the offset of the
299  /// FunctionInfo data that we will decode using this function.
300  ///
301  /// \param Index An index into the address table.
302  /// \returns An optional GSYM data offset for the offset of the FunctionInfo
303  /// that needs to be decoded.
305 };
306 
307 } // namespace gsym
308 } // namespace llvm
309 
310 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::gsym::Header::BaseAddress
uint64_t BaseAddress
The 64 bit base address that all address offsets in the address offsets table are relative to.
Definition: Header.h:62
llvm::gsym::GsymReader::getAddressIndex
Expected< uint64_t > getAddressIndex(const uint64_t Addr) const
Given an address, find the address index.
Definition: GsymReader.cpp:227
llvm::gsym::GsymReader::~GsymReader
~GsymReader()
llvm::gsym::GsymReader::openFile
static llvm::Expected< GsymReader > openFile(StringRef Path)
Construct a GsymReader from a file on disk.
Definition: GsymReader.cpp:34
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1661
llvm::gsym::Header
The GSYM header.
Definition: Header.h:45
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
FunctionInfo.h
llvm::gsym::GsymReader::create
static llvm::Expected< llvm::gsym::GsymReader > create(std::unique_ptr< MemoryBuffer > &MemBuffer)
Create a GSYM from a memory buffer.
Definition: GsymReader.cpp:50
llvm::gsym::InlineInfo
Inline information stores the name of the inline function along with an array of address ranges.
Definition: InlineInfo.h:61
llvm::ArrayRef::data
const T * data() const
Definition: ArrayRef.h:162
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::gsym::GsymReader::getAddress
Optional< uint64_t > getAddress(size_t Index) const
Gets an address from the address table.
Definition: GsymReader.cpp:209
FileEntry.h
llvm::gsym::GsymReader
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:44
llvm::gsym::GsymReader::addressForIndex
Optional< uint64_t > addressForIndex(size_t Index) const
Get an appropriate address from the address table.
Definition: GsymReader.h:241
llvm::gsym::GsymReader::getFile
Optional< FileEntry > getFile(uint32_t Index) const
Get the a file entry for the suppplied file index.
Definition: GsymReader.h:141
llvm::gsym::GsymReader::getString
StringRef getString(uint32_t Offset) const
Get a string from the string table.
Definition: GsymReader.h:130
parse
Definition: regcomp.c:192
llvm::gsym::GsymReader::copyBuffer
static llvm::Expected< GsymReader > copyBuffer(StringRef Bytes)
Construct a GsymReader from a buffer.
Definition: GsymReader.cpp:44
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::gsym::GsymReader::getHeader
const Header & getHeader() const
Access the GSYM header.
Definition: GsymReader.cpp:201
LineEntry.h
llvm::None
const NoneType None
Definition: None.h:23
llvm::gsym::LineTable
LineTable class contains deserialized versions of line tables for each function's address ranges.
Definition: LineTable.h:118
llvm::gsym::FunctionInfo
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:89
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
StringTable.h
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
ErrorOr.h
llvm::gsym::GsymReader::getNumAddresses
uint32_t getNumAddresses() const
Get the number of addresses in this Gsym file.
Definition: GsymReader.h:197
ArrayRef.h
llvm::ArrayRef< uint8_t >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::gsym::GsymReader::lookup
llvm::Expected< LookupResult > lookup(uint64_t Addr) const
Lookup an address in the a GSYM.
Definition: GsymReader.cpp:279
uint32_t
llvm::gsym::Header::NumAddresses
uint32_t NumAddresses
The number of addresses stored in the address offsets table.
Definition: Header.h:64
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
DataExtractor.h
llvm::gsym::StringTable
String tables in GSYM files are required to start with an empty string at offset zero.
Definition: StringTable.h:21
llvm::gsym::GsymReader::getAddressInfoOffset
Optional< uint64_t > getAddressInfoOffset(size_t Index) const
Given an address index, get the offset for the FunctionInfo.
Definition: GsymReader.cpp:219
llvm::gsym::GsymReader::dump
void dump(raw_ostream &OS)
Dump the entire Gsym data contained in this object.
Definition: GsymReader.cpp:294
llvm::gsym::GsymReader::getAddrOffsets
ArrayRef< T > getAddrOffsets() const
Get an appropriate address info offsets array.
Definition: GsymReader.h:223
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::support::endianness
endianness
Definition: Endian.h:27
llvm::gsym::GsymReader::getAddressOffsetIndex
llvm::Optional< uint64_t > getAddressOffsetIndex(const uint64_t AddrOffset) const
Lookup an address offset in the AddrOffsets table.
Definition: GsymReader.h:257
Header.h
Endian.h
llvm::gsym::GsymReader::getFunctionInfo
llvm::Expected< FunctionInfo > getFunctionInfo(uint64_t Addr) const
Get the full function info for an address.
Definition: GsymReader.cpp:257
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154