LLVM 23.0.0git
FunctionInfo.h
Go to the documentation of this file.
1//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11
22#include <cstdint>
23
24namespace llvm {
25class raw_ostream;
26
27namespace gsym {
28
29class GsymCreator;
30class GsymReader;
31/// Function information in GSYM files encodes information for one contiguous
32/// address range. If a function has discontiguous address ranges, they will
33/// need to be encoded using multiple FunctionInfo objects.
34///
35/// ENCODING
36///
37/// The function information gets the function start address as an argument
38/// to the FunctionInfo::decode(...) function. This information is calculated
39/// from the GSYM header and an address offset from the GSYM address offsets
40/// table. The encoded FunctionInfo information must be aligned to a 4 byte
41/// boundary.
42///
43/// The encoded data for a FunctionInfo starts with fixed data that all
44/// function info objects have:
45///
46/// ENCODING NAME DESCRIPTION
47/// ========= =========== ====================================================
48/// uint32_t Size The size in bytes of this function.
49/// uint32_t Name The string table offset of the function name.
50///
51/// The optional data in a FunctionInfo object follows this fixed information
52/// and consists of a stream of tuples that consist of:
53///
54/// ENCODING NAME DESCRIPTION
55/// ========= =========== ====================================================
56/// uint32_t InfoType An "InfoType" enumeration that describes the type
57/// of optional data that is encoded.
58/// uint32_t InfoLength The size in bytes of the encoded data that
59/// immediately follows this length if this value is
60/// greater than zero.
61/// uint8_t[] InfoData Encoded bytes that represent the data for the
62/// "InfoType". These bytes are only present if
63/// "InfoLength" is greater than zero.
64///
65/// The "InfoType" is an enumeration:
66///
67/// enum InfoType {
68/// EndOfList = 0u,
69/// LineTableInfo = 1u,
70/// InlineInfo = 2u,
71/// MergedFunctionsInfo = 3u,
72/// CallSiteInfo = 4u
73/// };
74///
75/// This stream of tuples is terminated by a "InfoType" whose value is
76/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
77/// the optional information list. This format allows us to add new optional
78/// information data to a FunctionInfo object over time and allows older
79/// clients to still parse the format and skip over any data that they don't
80/// understand or want to parse.
81///
82/// So the function information encoding essentially looks like:
83///
84/// struct {
85/// uint32_t Size;
86/// uint32_t Name;
87/// struct {
88/// uint32_t InfoType;
89/// uint32_t InfoLength;
90/// uint8_t InfoData[InfoLength];
91/// }[N];
92/// }
93///
94/// Where "N" is the number of tuples.
97 gsym_strp_t Name; ///< String table offset in the string table.
98 std::optional<LineTable> OptLineTable;
99 std::optional<InlineInfo> Inline;
100 std::optional<MergedFunctionsInfo> MergedFunctions;
101 std::optional<CallSiteInfoCollection> CallSites;
102 /// If we encode a FunctionInfo during segmenting so we know its size, we can
103 /// cache that encoding here so we don't need to re-encode it when saving the
104 /// GSYM file.
106
108 : Range(Addr, Addr + Size), Name(Name) {}
109
110 /// Query if a FunctionInfo has rich debug info.
111 ///
112 /// \returns A bool that indicates if this object has something else than
113 /// range and name. When converting information from a symbol table and from
114 /// debug info, we might end up with multiple FunctionInfo objects for the
115 /// same range and we need to be able to tell which one is the better object
116 /// to use.
117 bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
118
119 /// Query if a FunctionInfo object is valid.
120 ///
121 /// Address and size can be zero and there can be no line entries for a
122 /// symbol so the only indication this entry is valid is if the name is
123 /// not zero. This can happen when extracting information from symbol
124 /// tables that do not encode symbol sizes. In that case only the
125 /// address and name will be filled in.
126 ///
127 /// \returns A boolean indicating if this FunctionInfo is valid.
128 bool isValid() const {
129 return Name != 0;
130 }
131
132 /// Decode an object from a binary data stream.
133 ///
134 /// \param Data The binary stream to read the data from. This object must
135 /// have the data for the object starting at offset zero. The data
136 /// can contain more data than needed.
137 ///
138 /// \param BaseAddr The FunctionInfo's start address and will be used as the
139 /// base address when decoding any contained information like the line table
140 /// and the inline info.
141 ///
142 /// \returns An FunctionInfo or an error describing the issue that was
143 /// encountered during decoding.
145 uint64_t BaseAddr);
146
147 /// Encode this object into FileWriter stream.
148 ///
149 /// \param O The binary stream to write the data to at the current file
150 /// position.
151 ///
152 /// \param NoPadding Directly write the FunctionInfo data, without any padding
153 /// By default, FunctionInfo will be 4-byte aligned by padding with
154 /// 0's at the start. This is OK since the function will return the offset of
155 /// actual data in the stream. However when writing FunctionInfo's as a
156 /// stream, the padding will break the decoding of the data - since the offset
157 /// where the FunctionInfo starts is not kept in this scenario.
158 ///
159 /// \returns An error object that indicates failure or the offset of the
160 /// function info that was successfully written into the stream.
162 bool NoPadding = false) const;
163
164 /// Encode this function info into the internal byte cache and return the size
165 /// in bytes.
166 ///
167 /// When segmenting GSYM files we need to know how big each FunctionInfo will
168 /// encode into so we can generate segments of the right size. We don't want
169 /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
170 /// and re-use then when calling FunctionInfo::encode(...).
171 ///
172 /// \returns The size in bytes of the FunctionInfo if it were to be encoded
173 /// into a byte stream.
175
176 /// Lookup an address within a FunctionInfo object's data stream.
177 ///
178 /// Instead of decoding an entire FunctionInfo object when doing lookups,
179 /// we can decode only the information we need from the FunctionInfo's data
180 /// for the specific address. The lookup result information is returned as
181 /// a LookupResult.
182 ///
183 /// \param Data The binary stream to read the data from. This object must
184 /// have the data for the object starting at offset zero. The data
185 /// can contain more data than needed.
186 ///
187 /// \param GR The GSYM reader that contains the string and file table that
188 /// will be used to fill in information in the returned result.
189 ///
190 /// \param FuncAddr The function start address decoded from the GsymReader.
191 ///
192 /// \param Addr The address to lookup.
193 ///
194 /// \param MergedFuncsData A pointer to an optional GsymDataExtractor that, if
195 /// non-null, will be set to the raw data of the MergedFunctionInfo, if
196 /// present.
197 ///
198 /// \returns An LookupResult or an error describing the issue that was
199 /// encountered during decoding. An error should only be returned if the
200 /// address is not contained in the FunctionInfo or if the data is corrupted.
202 lookup(GsymDataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
203 uint64_t Addr,
204 std::optional<GsymDataExtractor> *MergedFuncsData = nullptr);
205
206 uint64_t startAddress() const { return Range.start(); }
207 uint64_t endAddress() const { return Range.end(); }
208 uint64_t size() const { return Range.size(); }
209
210 void clear() {
211 Range = {0, 0};
212 Name = 0;
213 OptLineTable = std::nullopt;
214 Inline = std::nullopt;
215 }
216};
217
218inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
219 return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
220 LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
221}
222inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
223 return !(LHS == RHS);
224}
225/// This sorting will order things consistently by address range first, but
226/// then followed by increasing levels of debug info like inline information
227/// and line tables. We might end up with a FunctionInfo from debug info that
228/// will have the same range as one from the symbol table, but we want to
229/// quickly be able to sort and use the best version when creating the final
230/// GSYM file. This function compares the inline information as we have seen
231/// cases where LTO can generate a wide array of differing inline information,
232/// mostly due to messing up the address ranges for inlined functions, so the
233/// inline information with the most entries will appeear last. If the inline
234/// information match, either by both function infos not having any or both
235/// being exactly the same, we will then compare line tables. Comparing line
236/// tables allows the entry with the most line entries to appear last. This
237/// ensures we are able to save the FunctionInfo with the most debug info into
238/// the GSYM file.
239inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
240 // First sort by address range
241 return std::tie(LHS.Range, LHS.Inline, LHS.OptLineTable) <
242 std::tie(RHS.Range, RHS.Inline, RHS.OptLineTable);
243}
244
245LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
246
247} // namespace gsym
248} // namespace llvm
249
250#endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
#define LLVM_ABI
Definition Compiler.h:213
This file defines the SmallString class.
Value * RHS
Value * LHS
A class that represents an address range.
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
A simplified binary data writer class that doesn't require targets, target definitions,...
Definition FileWriter.h:30
GsymCreator is used to emit GSYM data to a stand alone file or section within a file.
A DataExtractor subclass that adds GSYM-specific string offset support.
GsymReader is used to read GSYM data from a file or buffer.
Definition GsymReader.h:48
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
uint64_t gsym_strp_t
The type of string offset used in the code.
Definition GsymTypes.h:21
LLVM_ABI raw_ostream & operator<<(raw_ostream &OS, const CallSiteInfo &CSI)
bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS)
This sorting will order things consistently by address range first, but then followed by increasing l...
bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS)
bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Function information in GSYM files encodes information for one contiguous address range.
std::optional< InlineInfo > Inline
std::optional< MergedFunctionsInfo > MergedFunctions
uint64_t startAddress() const
uint64_t endAddress() const
static LLVM_ABI llvm::Expected< FunctionInfo > decode(GsymDataExtractor &Data, uint64_t BaseAddr)
Decode an object from a binary data stream.
bool isValid() const
Query if a FunctionInfo object is valid.
std::optional< CallSiteInfoCollection > CallSites
bool hasRichInfo() const
Query if a FunctionInfo has rich debug info.
uint64_t size() const
FunctionInfo(uint64_t Addr=0, uint64_t Size=0, gsym_strp_t Name=0)
gsym_strp_t Name
String table offset in the string table.
LLVM_ABI llvm::Expected< uint64_t > encode(FileWriter &O, bool NoPadding=false) const
Encode this object into FileWriter stream.
SmallString< 32 > EncodingCache
If we encode a FunctionInfo during segmenting so we know its size, we can cache that encoding here so...
std::optional< LineTable > OptLineTable
LLVM_ABI uint64_t cacheEncoding(GsymCreator &GC)
Encode this function info into the internal byte cache and return the size in bytes.
static LLVM_ABI llvm::Expected< LookupResult > lookup(GsymDataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr, uint64_t Addr, std::optional< GsymDataExtractor > *MergedFuncsData=nullptr)
Lookup an address within a FunctionInfo object's data stream.