LLVM 20.0.0git
FunctionInfo.h
Go to the documentation of this file.
1//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11
20#include <cstdint>
21
22namespace llvm {
23class raw_ostream;
24
25namespace gsym {
26
27class GsymReader;
28/// Function information in GSYM files encodes information for one contiguous
29/// address range. If a function has discontiguous address ranges, they will
30/// need to be encoded using multiple FunctionInfo objects.
31///
32/// ENCODING
33///
34/// The function information gets the function start address as an argument
35/// to the FunctionInfo::decode(...) function. This information is calculated
36/// from the GSYM header and an address offset from the GSYM address offsets
37/// table. The encoded FunctionInfo information must be aligned to a 4 byte
38/// boundary.
39///
40/// The encoded data for a FunctionInfo starts with fixed data that all
41/// function info objects have:
42///
43/// ENCODING NAME DESCRIPTION
44/// ========= =========== ====================================================
45/// uint32_t Size The size in bytes of this function.
46/// uint32_t Name The string table offset of the function name.
47///
48/// The optional data in a FunctionInfo object follows this fixed information
49/// and consists of a stream of tuples that consist of:
50///
51/// ENCODING NAME DESCRIPTION
52/// ========= =========== ====================================================
53/// uint32_t InfoType An "InfoType" enumeration that describes the type
54/// of optional data that is encoded.
55/// uint32_t InfoLength The size in bytes of the encoded data that
56/// immediately follows this length if this value is
57/// greater than zero.
58/// uint8_t[] InfoData Encoded bytes that represent the data for the
59/// "InfoType". These bytes are only present if
60/// "InfoLength" is greater than zero.
61///
62/// The "InfoType" is an enumeration:
63///
64/// enum InfoType {
65/// EndOfList = 0u,
66/// LineTableInfo = 1u,
67/// InlineInfo = 2u,
68/// MergedFunctionsInfo = 3u,
69/// CallSiteInfo = 4u
70/// };
71///
72/// This stream of tuples is terminated by a "InfoType" whose value is
73/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
74/// the optional information list. This format allows us to add new optional
75/// information data to a FunctionInfo object over time and allows older
76/// clients to still parse the format and skip over any data that they don't
77/// understand or want to parse.
78///
79/// So the function information encoding essentially looks like:
80///
81/// struct {
82/// uint32_t Size;
83/// uint32_t Name;
84/// struct {
85/// uint32_t InfoType;
86/// uint32_t InfoLength;
87/// uint8_t InfoData[InfoLength];
88/// }[N];
89/// }
90///
91/// Where "N" is the number of tuples.
94 uint32_t Name; ///< String table offset in the string table.
95 std::optional<LineTable> OptLineTable;
96 std::optional<InlineInfo> Inline;
97 std::optional<MergedFunctionsInfo> MergedFunctions;
98 std::optional<CallSiteInfoCollection> CallSites;
99 /// If we encode a FunctionInfo during segmenting so we know its size, we can
100 /// cache that encoding here so we don't need to re-encode it when saving the
101 /// GSYM file.
103
105 : Range(Addr, Addr + Size), Name(N) {}
106
107 /// Query if a FunctionInfo has rich debug info.
108 ///
109 /// \returns A bool that indicates if this object has something else than
110 /// range and name. When converting information from a symbol table and from
111 /// debug info, we might end up with multiple FunctionInfo objects for the
112 /// same range and we need to be able to tell which one is the better object
113 /// to use.
114 bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
115
116 /// Query if a FunctionInfo object is valid.
117 ///
118 /// Address and size can be zero and there can be no line entries for a
119 /// symbol so the only indication this entry is valid is if the name is
120 /// not zero. This can happen when extracting information from symbol
121 /// tables that do not encode symbol sizes. In that case only the
122 /// address and name will be filled in.
123 ///
124 /// \returns A boolean indicating if this FunctionInfo is valid.
125 bool isValid() const {
126 return Name != 0;
127 }
128
129 /// Decode an object from a binary data stream.
130 ///
131 /// \param Data The binary stream to read the data from. This object must
132 /// have the data for the object starting at offset zero. The data
133 /// can contain more data than needed.
134 ///
135 /// \param BaseAddr The FunctionInfo's start address and will be used as the
136 /// base address when decoding any contained information like the line table
137 /// and the inline info.
138 ///
139 /// \returns An FunctionInfo or an error describing the issue that was
140 /// encountered during decoding.
142 uint64_t BaseAddr);
143
144 /// Encode this object into FileWriter stream.
145 ///
146 /// \param O The binary stream to write the data to at the current file
147 /// position.
148 ///
149 /// \param NoPadding Directly write the FunctionInfo data, without any padding
150 /// By default, FunctionInfo will be 4-byte aligned by padding with
151 /// 0's at the start. This is OK since the function will return the offset of
152 /// actual data in the stream. However when writing FunctionInfo's as a
153 /// stream, the padding will break the decoding of the data - since the offset
154 /// where the FunctionInfo starts is not kept in this scenario.
155 ///
156 /// \returns An error object that indicates failure or the offset of the
157 /// function info that was successfully written into the stream.
158 llvm::Expected<uint64_t> encode(FileWriter &O, bool NoPadding = false) const;
159
160 /// Encode this function info into the internal byte cache and return the size
161 /// in bytes.
162 ///
163 /// When segmenting GSYM files we need to know how big each FunctionInfo will
164 /// encode into so we can generate segments of the right size. We don't want
165 /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
166 /// and re-use then when calling FunctionInfo::encode(...).
167 ///
168 /// \returns The size in bytes of the FunctionInfo if it were to be encoded
169 /// into a byte stream.
171
172 /// Lookup an address within a FunctionInfo object's data stream.
173 ///
174 /// Instead of decoding an entire FunctionInfo object when doing lookups,
175 /// we can decode only the information we need from the FunctionInfo's data
176 /// for the specific address. The lookup result information is returned as
177 /// a LookupResult.
178 ///
179 /// \param Data The binary stream to read the data from. This object must
180 /// have the data for the object starting at offset zero. The data
181 /// can contain more data than needed.
182 ///
183 /// \param GR The GSYM reader that contains the string and file table that
184 /// will be used to fill in information in the returned result.
185 ///
186 /// \param FuncAddr The function start address decoded from the GsymReader.
187 ///
188 /// \param Addr The address to lookup.
189 ///
190 /// \returns An LookupResult or an error describing the issue that was
191 /// encountered during decoding. An error should only be returned if the
192 /// address is not contained in the FunctionInfo or if the data is corrupted.
194 const GsymReader &GR,
195 uint64_t FuncAddr,
196 uint64_t Addr);
197
198 uint64_t startAddress() const { return Range.start(); }
199 uint64_t endAddress() const { return Range.end(); }
200 uint64_t size() const { return Range.size(); }
201
202 void clear() {
203 Range = {0, 0};
204 Name = 0;
205 OptLineTable = std::nullopt;
206 Inline = std::nullopt;
207 }
208};
209
210inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
211 return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
212 LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
213}
214inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
215 return !(LHS == RHS);
216}
217/// This sorting will order things consistently by address range first, but
218/// then followed by increasing levels of debug info like inline information
219/// and line tables. We might end up with a FunctionInfo from debug info that
220/// will have the same range as one from the symbol table, but we want to
221/// quickly be able to sort and use the best version when creating the final
222/// GSYM file. This function compares the inline information as we have seen
223/// cases where LTO can generate a wide array of differing inline information,
224/// mostly due to messing up the address ranges for inlined functions, so the
225/// inline information with the most entries will appeear last. If the inline
226/// information match, either by both function infos not having any or both
227/// being exactly the same, we will then compare line tables. Comparing line
228/// tables allows the entry with the most line entries to appear last. This
229/// ensures we are able to save the FunctionInfo with the most debug info into
230/// the GSYM file.
231inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
232 // First sort by address range
233 if (LHS.Range != RHS.Range)
234 return LHS.Range < RHS.Range;
235 if (LHS.Inline == RHS.Inline)
236 return LHS.OptLineTable < RHS.OptLineTable;
237 return LHS.Inline < RHS.Inline;
238}
239
240raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
241
242} // namespace gsym
243} // namespace llvm
244
245#endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
uint64_t Addr
uint64_t Size
raw_pwrite_stream & OS
This file defines the SmallString class.
Value * RHS
Value * LHS
A class that represents an address range.
Definition: AddressRanges.h:22
uint64_t start() const
Definition: AddressRanges.h:28
uint64_t end() const
Definition: AddressRanges.h:29
uint64_t size() const
Definition: AddressRanges.h:30
Tagged union holding either a T or a Error.
Definition: Error.h:481
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
A simplified binary data writer class that doesn't require targets, target definitions,...
Definition: FileWriter.h:29
GsymReader is used to read GSYM data from a file or buffer.
Definition: GsymReader.h:44
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & operator<<(raw_ostream &OS, const CallSiteInfo &CSI)
bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS)
This sorting will order things consistently by address range first, but then followed by increasing l...
Definition: FunctionInfo.h:231
bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS)
Definition: FunctionInfo.h:214
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
#define N
Function information in GSYM files encodes information for one contiguous address range.
Definition: FunctionInfo.h:92
std::optional< InlineInfo > Inline
Definition: FunctionInfo.h:96
std::optional< MergedFunctionsInfo > MergedFunctions
Definition: FunctionInfo.h:97
uint64_t startAddress() const
Definition: FunctionInfo.h:198
uint64_t endAddress() const
Definition: FunctionInfo.h:199
bool isValid() const
Query if a FunctionInfo object is valid.
Definition: FunctionInfo.h:125
std::optional< CallSiteInfoCollection > CallSites
Definition: FunctionInfo.h:98
bool hasRichInfo() const
Query if a FunctionInfo has rich debug info.
Definition: FunctionInfo.h:114
uint64_t size() const
Definition: FunctionInfo.h:200
static llvm::Expected< LookupResult > lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr, uint64_t Addr)
Lookup an address within a FunctionInfo object's data stream.
uint64_t cacheEncoding()
Encode this function info into the internal byte cache and return the size in bytes.
uint32_t Name
String table offset in the string table.
Definition: FunctionInfo.h:94
llvm::Expected< uint64_t > encode(FileWriter &O, bool NoPadding=false) const
Encode this object into FileWriter stream.
SmallString< 32 > EncodingCache
If we encode a FunctionInfo during segmenting so we know its size, we can cache that encoding here so...
Definition: FunctionInfo.h:102
std::optional< LineTable > OptLineTable
Definition: FunctionInfo.h:95
FunctionInfo(uint64_t Addr=0, uint64_t Size=0, uint32_t N=0)
Definition: FunctionInfo.h:104
static llvm::Expected< FunctionInfo > decode(DataExtractor &Data, uint64_t BaseAddr)
Decode an object from a binary data stream.