LLVM 22.0.0git
MemProfRadixTree.h
Go to the documentation of this file.
1//===- MemProfRadixTree.h - MemProf format support ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// A custom Radix Tree builder for memprof data to optimize for space.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
14#define LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
15
16#include "llvm/ADT/BitVector.h"
20
21#include <optional>
22
23namespace llvm {
24namespace memprof {
25namespace detail {
26// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
27// to do so in one of two different ways depending on the type of the hash
28// table.
29template <typename value_type, typename IterTy>
30value_type DerefIterator(IterTy Iter) {
31 using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
32 if constexpr (std::is_same_v<deref_type, value_type>)
33 return *Iter;
34 else
35 return Iter->second;
36}
37} // namespace detail
38
39// A function object that returns a frame for a given FrameId.
40template <typename MapTy> struct FrameIdConverter {
41 std::optional<FrameId> LastUnmappedId;
42 MapTy &Map;
43
44 FrameIdConverter() = delete;
46
47 // Delete the copy constructor and copy assignment operator to avoid a
48 // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
49 // while the original instance doesn't.
52
54 auto Iter = Map.find(Id);
55 if (Iter == Map.end()) {
56 LastUnmappedId = Id;
57 return Frame();
58 }
59 return detail::DerefIterator<Frame>(Iter);
60 }
61};
62
63// A function object that returns a call stack for a given CallStackId.
64template <typename MapTy> struct CallStackIdConverter {
65 std::optional<CallStackId> LastUnmappedId;
66 MapTy &Map;
68
73
74 // Delete the copy constructor and copy assignment operator to avoid a
75 // situation where a copy of CallStackIdConverter gets an error in
76 // LastUnmappedId while the original instance doesn't.
79
80 std::vector<Frame> operator()(CallStackId CSId) {
81 std::vector<Frame> Frames;
82 auto CSIter = Map.find(CSId);
83 if (CSIter == Map.end()) {
84 LastUnmappedId = CSId;
85 } else {
87 detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
88 Frames.reserve(CS.size());
89 for (FrameId Id : CS)
90 Frames.push_back(FrameIdToFrame(Id));
91 }
92 return Frames;
93 }
94};
95
96// A function object that returns a Frame stored at a given index into the Frame
97// array in the profile.
99 const unsigned char *FrameBase;
100
102 LinearFrameIdConverter(const unsigned char *FrameBase)
103 : FrameBase(FrameBase) {}
104
106 uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
108 }
109};
110
111// A function object that returns a call stack stored at a given index into the
112// call stack array in the profile.
114 const unsigned char *CallStackBase;
116
119 const unsigned char *CallStackBase,
122
123 std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
124 std::vector<Frame> Frames;
125
126 const unsigned char *Ptr =
128 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
129 uint32_t NumFrames =
130 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
131 Frames.reserve(NumFrames);
132 for (; NumFrames; --NumFrames) {
133 LinearFrameId Elem =
134 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
135 // Follow a pointer to the parent, if any. See comments below on
136 // CallStackRadixTreeBuilder for the description of the radix tree format.
137 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
138 Ptr += (-Elem) * sizeof(LinearFrameId);
139 Elem =
140 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
141 }
142 // We shouldn't encounter another pointer.
143 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
144 Frames.push_back(FrameIdToFrame(Elem));
145 Ptr += sizeof(LinearFrameId);
146 }
147
148 return Frames;
149 }
150};
151
152// Used to extract caller-callee pairs from the call stack array. The leaf
153// frame is assumed to call a heap allocation function with GUID 0. The
154// resulting pairs are accumulated in CallerCalleePairs. Users can take it
155// with:
156//
157// auto Pairs = std::move(Extractor.CallerCalleePairs);
159 // The base address of the radix tree array.
160 const unsigned char *CallStackBase;
161 // A functor to convert a linear FrameId to a Frame.
163 // A map from caller GUIDs to lists of call sites in respective callers.
165
166 // The set of linear call stack IDs that we've visited.
168
171 const unsigned char *CallStackBase,
173 unsigned RadixTreeSize)
175 Visited(RadixTreeSize) {}
176
177 void operator()(LinearCallStackId LinearCSId) {
178 const unsigned char *Ptr =
180 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
181 uint32_t NumFrames =
182 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
183 // The leaf frame calls a function with GUID 0.
184 uint64_t CalleeGUID = 0;
185 for (; NumFrames; --NumFrames) {
186 LinearFrameId Elem =
187 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
188 // Follow a pointer to the parent, if any. See comments below on
189 // CallStackRadixTreeBuilder for the description of the radix tree format.
190 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
191 Ptr += (-Elem) * sizeof(LinearFrameId);
192 Elem =
193 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
194 }
195 // We shouldn't encounter another pointer.
196 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
197
198 // Add a new caller-callee pair.
199 Frame F = FrameIdToFrame(Elem);
200 uint64_t CallerGUID = F.Function;
201 LineLocation Loc(F.LineOffset, F.Column);
202 CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID);
203
204 // Keep track of the indices we've visited. If we've already visited the
205 // current one, terminate the traversal. We will not discover any new
206 // caller-callee pair by continuing the traversal.
207 unsigned Offset =
208 std::distance(CallStackBase, Ptr) / sizeof(LinearFrameId);
209 if (Visited.test(Offset))
210 break;
212
213 Ptr += sizeof(LinearFrameId);
214 CalleeGUID = CallerGUID;
215 }
216 }
217};
218
219// A convenience wrapper around FrameIdConverter and CallStackIdConverter for
220// tests.
224 : FrameIdConv(MemProfData.Frames),
225 CSIdConv(MemProfData.CallStacks, FrameIdConv) {}
226
227 // Delete the copy constructor and copy assignment operator to avoid a
228 // situation where a copy of IndexedCallstackIdConverter gets an error in
229 // LastUnmappedId while the original instance doesn't.
233
234 std::vector<Frame> operator()(CallStackId CSId) { return CSIdConv(CSId); }
235
238};
239
240struct FrameStat {
241 // The number of occurrences of a given FrameId.
243 // The sum of indexes where a given FrameId shows up.
245};
246
247// Compute a histogram of Frames in call stacks.
248template <typename FrameIdTy>
251 &MemProfCallStackData);
252
253// Construct a radix tree of call stacks.
254//
255// A set of call stacks might look like:
256//
257// CallStackId 1: f1 -> f2 -> f3
258// CallStackId 2: f1 -> f2 -> f4 -> f5
259// CallStackId 3: f1 -> f2 -> f4 -> f6
260// CallStackId 4: f7 -> f8 -> f9
261//
262// where each fn refers to a stack frame.
263//
264// Since we expect a lot of common prefixes, we can compress the call stacks
265// into a radix tree like:
266//
267// CallStackId 1: f1 -> f2 -> f3
268// |
269// CallStackId 2: +---> f4 -> f5
270// |
271// CallStackId 3: +---> f6
272//
273// CallStackId 4: f7 -> f8 -> f9
274//
275// Now, we are interested in retrieving call stacks for a given CallStackId, so
276// we just need a pointer from a given call stack to its parent. For example,
277// CallStackId 2 would point to CallStackId 1 as a parent.
278//
279// We serialize the radix tree above into a single array along with the length
280// of each call stack and pointers to the parent call stacks.
281//
282// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
283// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
284// ^ ^ ^ ^
285// | | | |
286// CallStackId 4: 0 --+ | | |
287// CallStackId 3: 4 --------------+ | |
288// CallStackId 2: 7 -----------------------+ |
289// CallStackId 1: 11 -----------------------------------+
290//
291// - LN indicates the length of a call stack, encoded as ordinary integer N.
292//
293// - JN indicates a pointer to the parent, encoded as -N.
294//
295// The radix tree allows us to reconstruct call stacks in the leaf-to-root
296// order as we scan the array from left ro right while following pointers to
297// parents along the way.
298//
299// For example, if we are decoding CallStackId 2, we start a forward traversal
300// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
301// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
302// picking up f2 and f1. We are done after collecting 4 frames as indicated at
303// the beginning of the traversal.
304//
305// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
306// the radix tree array, so we do not explicitly encode mappings like:
307// "CallStackId 1 -> 11".
308template <typename FrameIdTy> class CallStackRadixTreeBuilder {
309 // The radix tree array.
310 std::vector<LinearFrameId> RadixArray;
311
312 // Mapping from CallStackIds to indexes into RadixArray.
314
315 // In build, we partition a given call stack into two parts -- the prefix
316 // that's common with the previously encoded call stack and the frames beyond
317 // the common prefix -- the unique portion. Then we want to find out where
318 // the common prefix is stored in RadixArray so that we can link the unique
319 // portion to the common prefix. Indexes, declared below, helps with our
320 // needs. Intuitively, Indexes tells us where each of the previously encoded
321 // call stack is stored in RadixArray. More formally, Indexes satisfies:
322 //
323 // RadixArray[Indexes[I]] == Prev[I]
324 //
325 // for every I, where Prev is the the call stack in the root-to-leaf order
326 // previously encoded by build. (Note that Prev, as passed to
327 // encodeCallStack, is in the leaf-to-root order.)
328 //
329 // For example, if the call stack being encoded shares 5 frames at the root of
330 // the call stack with the previously encoded call stack,
331 // RadixArray[Indexes[0]] is the root frame of the common prefix.
332 // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
333 std::vector<LinearCallStackId> Indexes;
334
335 using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;
336
337 // Encode a call stack into RadixArray. Return the starting index within
338 // RadixArray.
339 LinearCallStackId encodeCallStack(
342 const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
343
344public:
346
347 // Build a radix tree array.
348 void
350 &&MemProfCallStackData,
351 const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
353
354 ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
355
357 return std::move(CallStackPos);
358 }
359};
360
361// Defined in MemProfRadixTree.cpp
363extern template class LLVM_TEMPLATE_ABI
365
366} // namespace memprof
367} // namespace llvm
368#endif // LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the BitVector class.
#define LLVM_TEMPLATE_ABI
Definition: Compiler.h:214
#define F(x, y, z)
Definition: MD5.cpp:55
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_t size() const
Definition: SmallVector.h:79
void reserve(size_type N)
Definition: SmallVector.h:664
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An efficient, type-erasing, non-owning reference to a callable.
void build(llvm::MapVector< CallStackId, llvm::SmallVector< FrameIdTy > > &&MemProfCallStackData, const llvm::DenseMap< FrameIdTy, LinearFrameId > *MemProfFrameIndexes, llvm::DenseMap< FrameIdTy, FrameStat > &FrameHistogram)
ArrayRef< LinearFrameId > getRadixArray() const
llvm::DenseMap< CallStackId, LinearCallStackId > takeCallStackPos()
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
value_type DerefIterator(IterTy Iter)
template class LLVM_TEMPLATE_ABI CallStackRadixTreeBuilder< FrameId >
uint64_t CallStackId
Definition: MemProf.h:352
uint32_t LinearFrameId
Definition: MemProf.h:238
template class LLVM_TEMPLATE_ABI CallStackRadixTreeBuilder< LinearFrameId >
llvm::DenseMap< FrameIdTy, FrameStat > computeFrameHistogram(llvm::MapVector< CallStackId, llvm::SmallVector< FrameIdTy > > &MemProfCallStackData)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
typename llvm::remove_cvref< T >::type remove_cvref_t
CallStackIdConverter & operator=(const CallStackIdConverter &)=delete
llvm::function_ref< Frame(FrameId)> FrameIdToFrame
std::optional< CallStackId > LastUnmappedId
CallStackIdConverter(MapTy &Map, llvm::function_ref< Frame(FrameId)> FrameIdToFrame)
std::vector< Frame > operator()(CallStackId CSId)
CallStackIdConverter(const CallStackIdConverter &)=delete
void operator()(LinearCallStackId LinearCSId)
llvm::function_ref< Frame(LinearFrameId)> FrameIdToFrame
DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > CallerCalleePairs
CallerCalleePairExtractor(const unsigned char *CallStackBase, llvm::function_ref< Frame(LinearFrameId)> FrameIdToFrame, unsigned RadixTreeSize)
FrameIdConverter(const FrameIdConverter &)=delete
std::optional< FrameId > LastUnmappedId
FrameIdConverter & operator=(const FrameIdConverter &)=delete
static constexpr size_t serializedSize()
Definition: MemProf.h:335
static Frame deserialize(const unsigned char *Ptr)
Definition: MemProf.h:320
CallStackIdConverter< decltype(IndexedMemProfData::CallStacks)> CSIdConv
std::vector< Frame > operator()(CallStackId CSId)
FrameIdConverter< decltype(IndexedMemProfData::Frames)> FrameIdConv
IndexedCallstackIdConverter(const IndexedCallstackIdConverter &)=delete
IndexedCallstackIdConverter & operator=(const IndexedCallstackIdConverter &)=delete
IndexedCallstackIdConverter(IndexedMemProfData &MemProfData)
llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > CallStacks
llvm::MapVector< FrameId, Frame > Frames
llvm::function_ref< Frame(LinearFrameId)> FrameIdToFrame
std::vector< Frame > operator()(LinearCallStackId LinearCSId)
LinearCallStackIdConverter(const unsigned char *CallStackBase, llvm::function_ref< Frame(LinearFrameId)> FrameIdToFrame)
LinearFrameIdConverter(const unsigned char *FrameBase)
Frame operator()(LinearFrameId LinearId)