LLVM 20.0.0git
MemProf.cpp
Go to the documentation of this file.
3#include "llvm/IR/Function.h"
10
11namespace llvm {
12namespace memprof {
15#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
17#undef MIBEntryDef
18 return List;
19}
20
22 return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
23 Meta::TotalLifetimeAccessDensity};
24}
25
26static size_t serializedSizeV0(const IndexedAllocationInfo &IAI,
27 const MemProfSchema &Schema) {
28 size_t Size = 0;
29 // The number of frames to serialize.
30 Size += sizeof(uint64_t);
31 // The callstack frame ids.
32 Size += sizeof(FrameId) * IAI.CallStack.size();
33 // The size of the payload.
35 return Size;
36}
37
38static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
39 const MemProfSchema &Schema) {
40 size_t Size = 0;
41 // The CallStackId
42 Size += sizeof(CallStackId);
43 // The size of the payload.
45 return Size;
46}
47
48static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
49 const MemProfSchema &Schema) {
50 size_t Size = 0;
51 // The linear call stack ID.
52 Size += sizeof(LinearCallStackId);
53 // The size of the payload.
55 return Size;
56}
57
59 IndexedVersion Version) const {
60 switch (Version) {
61 case Version0:
62 case Version1:
63 return serializedSizeV0(*this, Schema);
64 case Version2:
65 return serializedSizeV2(*this, Schema);
66 case Version3:
67 return serializedSizeV3(*this, Schema);
68 }
69 llvm_unreachable("unsupported MemProf version");
70}
71
73 const MemProfSchema &Schema) {
74 // The number of alloc sites to serialize.
75 size_t Result = sizeof(uint64_t);
76 for (const IndexedAllocationInfo &N : Record.AllocSites)
77 Result += N.serializedSize(Schema, Version0);
78
79 // The number of callsites we have information for.
80 Result += sizeof(uint64_t);
81 for (const auto &Frames : Record.CallSites) {
82 // The number of frame ids to serialize.
83 Result += sizeof(uint64_t);
84 Result += Frames.size() * sizeof(FrameId);
85 }
86 return Result;
87}
88
90 const MemProfSchema &Schema) {
91 // The number of alloc sites to serialize.
92 size_t Result = sizeof(uint64_t);
93 for (const IndexedAllocationInfo &N : Record.AllocSites)
94 Result += N.serializedSize(Schema, Version2);
95
96 // The number of callsites we have information for.
97 Result += sizeof(uint64_t);
98 // The CallStackId
99 Result += Record.CallSiteIds.size() * sizeof(CallStackId);
100 return Result;
101}
102
104 const MemProfSchema &Schema) {
105 // The number of alloc sites to serialize.
106 size_t Result = sizeof(uint64_t);
107 for (const IndexedAllocationInfo &N : Record.AllocSites)
108 Result += N.serializedSize(Schema, Version3);
109
110 // The number of callsites we have information for.
111 Result += sizeof(uint64_t);
112 // The linear call stack ID.
113 Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
114 return Result;
115}
116
118 IndexedVersion Version) const {
119 switch (Version) {
120 case Version0:
121 case Version1:
122 return serializedSizeV0(*this, Schema);
123 case Version2:
124 return serializedSizeV2(*this, Schema);
125 case Version3:
126 return serializedSizeV3(*this, Schema);
127 }
128 llvm_unreachable("unsupported MemProf version");
129}
130
132 const MemProfSchema &Schema, raw_ostream &OS) {
133 using namespace support;
134
136
137 LE.write<uint64_t>(Record.AllocSites.size());
138 for (const IndexedAllocationInfo &N : Record.AllocSites) {
139 LE.write<uint64_t>(N.CallStack.size());
140 for (const FrameId &Id : N.CallStack)
141 LE.write<FrameId>(Id);
142 N.Info.serialize(Schema, OS);
143 }
144
145 // Related contexts.
146 LE.write<uint64_t>(Record.CallSites.size());
147 for (const auto &Frames : Record.CallSites) {
148 LE.write<uint64_t>(Frames.size());
149 for (const FrameId &Id : Frames)
150 LE.write<FrameId>(Id);
151 }
152}
153
155 const MemProfSchema &Schema, raw_ostream &OS) {
156 using namespace support;
157
159
160 LE.write<uint64_t>(Record.AllocSites.size());
161 for (const IndexedAllocationInfo &N : Record.AllocSites) {
162 LE.write<CallStackId>(N.CSId);
163 N.Info.serialize(Schema, OS);
164 }
165
166 // Related contexts.
167 LE.write<uint64_t>(Record.CallSiteIds.size());
168 for (const auto &CSId : Record.CallSiteIds)
169 LE.write<CallStackId>(CSId);
170}
171
172static void serializeV3(
173 const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
175 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
176 using namespace support;
177
179
180 LE.write<uint64_t>(Record.AllocSites.size());
181 for (const IndexedAllocationInfo &N : Record.AllocSites) {
182 assert(MemProfCallStackIndexes.contains(N.CSId));
183 LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
184 N.Info.serialize(Schema, OS);
185 }
186
187 // Related contexts.
188 LE.write<uint64_t>(Record.CallSiteIds.size());
189 for (const auto &CSId : Record.CallSiteIds) {
190 assert(MemProfCallStackIndexes.contains(CSId));
191 LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
192 }
193}
194
197 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
198 const {
199 switch (Version) {
200 case Version0:
201 case Version1:
202 serializeV0(*this, Schema, OS);
203 return;
204 case Version2:
205 serializeV2(*this, Schema, OS);
206 return;
207 case Version3:
208 serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
209 return;
210 }
211 llvm_unreachable("unsupported MemProf version");
212}
213
215 const unsigned char *Ptr) {
216 using namespace support;
217
219
220 // Read the meminfo nodes.
221 const uint64_t NumNodes =
222 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
223 for (uint64_t I = 0; I < NumNodes; I++) {
225 const uint64_t NumFrames =
226 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
227 for (uint64_t J = 0; J < NumFrames; J++) {
228 const FrameId Id =
229 endian::readNext<FrameId, llvm::endianness::little>(Ptr);
230 Node.CallStack.push_back(Id);
231 }
232 Node.CSId = hashCallStack(Node.CallStack);
233 Node.Info.deserialize(Schema, Ptr);
235 Record.AllocSites.push_back(Node);
236 }
237
238 // Read the callsite information.
239 const uint64_t NumCtxs =
240 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
241 for (uint64_t J = 0; J < NumCtxs; J++) {
242 const uint64_t NumFrames =
243 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
245 Frames.reserve(NumFrames);
246 for (uint64_t K = 0; K < NumFrames; K++) {
247 const FrameId Id =
248 endian::readNext<FrameId, llvm::endianness::little>(Ptr);
249 Frames.push_back(Id);
250 }
251 Record.CallSites.push_back(Frames);
252 Record.CallSiteIds.push_back(hashCallStack(Frames));
253 }
254
255 return Record;
256}
257
259 const unsigned char *Ptr) {
260 using namespace support;
261
263
264 // Read the meminfo nodes.
265 const uint64_t NumNodes =
266 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
267 Record.AllocSites.reserve(NumNodes);
268 for (uint64_t I = 0; I < NumNodes; I++) {
270 Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
271 Node.Info.deserialize(Schema, Ptr);
273 Record.AllocSites.push_back(Node);
274 }
275
276 // Read the callsite information.
277 const uint64_t NumCtxs =
278 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
279 Record.CallSiteIds.reserve(NumCtxs);
280 for (uint64_t J = 0; J < NumCtxs; J++) {
281 CallStackId CSId =
282 endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
283 Record.CallSiteIds.push_back(CSId);
284 }
285
286 return Record;
287}
288
290 const unsigned char *Ptr) {
291 using namespace support;
292
294
295 // Read the meminfo nodes.
296 const uint64_t NumNodes =
297 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
298 Record.AllocSites.reserve(NumNodes);
299 for (uint64_t I = 0; I < NumNodes; I++) {
301 Node.CSId =
302 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
303 Node.Info.deserialize(Schema, Ptr);
305 Record.AllocSites.push_back(Node);
306 }
307
308 // Read the callsite information.
309 const uint64_t NumCtxs =
310 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
311 Record.CallSiteIds.reserve(NumCtxs);
312 for (uint64_t J = 0; J < NumCtxs; J++) {
313 // We are storing LinearCallStackId in CallSiteIds, which is a vector of
314 // CallStackId. Assert that CallStackId is no smaller than
315 // LinearCallStackId.
316 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
317 LinearCallStackId CSId =
318 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
319 Record.CallSiteIds.push_back(CSId);
320 }
321
322 return Record;
323}
324
325IndexedMemProfRecord
327 const unsigned char *Ptr,
329 switch (Version) {
330 case Version0:
331 case Version1:
332 return deserializeV0(Schema, Ptr);
333 case Version2:
334 return deserializeV2(Schema, Ptr);
335 case Version3:
336 return deserializeV3(Schema, Ptr);
337 }
338 llvm_unreachable("unsupported MemProf version");
339}
340
342 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const {
344
345 Record.AllocSites.reserve(AllocSites.size());
346 for (const IndexedAllocationInfo &IndexedAI : AllocSites) {
348 AI.Info = IndexedAI.Info;
349 AI.CallStack = Callback(IndexedAI.CSId);
350 Record.AllocSites.push_back(std::move(AI));
351 }
352
353 Record.CallSites.reserve(CallSiteIds.size());
354 for (CallStackId CSId : CallSiteIds)
355 Record.CallSites.push_back(Callback(CSId));
356
357 return Record;
358}
359
361 // Canonicalize the function name to drop suffixes such as ".llvm.". Note
362 // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
363 // those by default. This is by design to differentiate internal linkage
364 // functions during matching. By dropping the other suffixes we can then match
365 // functions in the profile use phase prior to their addition. Note that this
366 // applies to both instrumented and sampled function names.
367 StringRef CanonicalName =
369
370 // We use the function guid which we expect to be a uint64_t. At
371 // this time, it is the lower 64 bits of the md5 of the canonical
372 // function name.
373 return Function::getGUID(CanonicalName);
374}
375
376Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
377 using namespace support;
378
379 const unsigned char *Ptr = Buffer;
380 const uint64_t NumSchemaIds =
381 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
382 if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
383 return make_error<InstrProfError>(instrprof_error::malformed,
384 "memprof schema invalid");
385 }
386
387 MemProfSchema Result;
388 for (size_t I = 0; I < NumSchemaIds; I++) {
389 const uint64_t Tag =
390 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
391 if (Tag >= static_cast<uint64_t>(Meta::Size)) {
392 return make_error<InstrProfError>(instrprof_error::malformed,
393 "memprof schema invalid");
394 }
395 Result.push_back(static_cast<Meta>(Tag));
396 }
397 // Advance the buffer to one past the schema if we succeeded.
398 Buffer = Ptr;
399 return Result;
400}
401
405 for (FrameId F : CS)
408 CallStackId CSId;
409 std::memcpy(&CSId, Hash.data(), sizeof(Hash));
410 return CSId;
411}
412
413// Encode a call stack into RadixArray. Return the starting index within
414// RadixArray. For each call stack we encode, we emit two or three components
415// into RadixArray. If a given call stack doesn't have a common prefix relative
416// to the previous one, we emit:
417//
418// - the frames in the given call stack in the root-to-leaf order
419//
420// - the length of the given call stack
421//
422// If a given call stack has a non-empty common prefix relative to the previous
423// one, we emit:
424//
425// - the relative location of the common prefix, encoded as a negative number.
426//
427// - a portion of the given call stack that's beyond the common prefix
428//
429// - the length of the given call stack, including the length of the common
430// prefix.
431//
432// The resulting RadixArray requires a somewhat unintuitive backward traversal
433// to reconstruct a call stack -- read the call stack length and scan backward
434// while collecting frames in the leaf to root order. build, the caller of this
435// function, reverses RadixArray in place so that we can reconstruct a call
436// stack as if we were deserializing an array in a typical way -- the call stack
437// length followed by the frames in the leaf-to-root order except that we need
438// to handle pointers to parents along the way.
439//
440// To quickly determine the location of the common prefix within RadixArray,
441// Indexes caches the indexes of the previous call stack's frames within
442// RadixArray.
443LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
444 const llvm::SmallVector<FrameId> *CallStack,
445 const llvm::SmallVector<FrameId> *Prev,
446 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
447 // Compute the length of the common root prefix between Prev and CallStack.
448 uint32_t CommonLen = 0;
449 if (Prev) {
450 auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(),
451 CallStack->rend());
452 CommonLen = std::distance(CallStack->rbegin(), Pos.second);
453 }
454
455 // Drop the portion beyond CommonLen.
456 assert(CommonLen <= Indexes.size());
457 Indexes.resize(CommonLen);
458
459 // Append a pointer to the parent.
460 if (CommonLen) {
461 uint32_t CurrentIndex = RadixArray.size();
462 uint32_t ParentIndex = Indexes.back();
463 // The offset to the parent must be negative because we are pointing to an
464 // element we've already added to RadixArray.
465 assert(ParentIndex < CurrentIndex);
466 RadixArray.push_back(ParentIndex - CurrentIndex);
467 }
468
469 // Copy the part of the call stack beyond the common prefix to RadixArray.
470 assert(CommonLen <= CallStack->size());
471 for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
472 // Remember the index of F in RadixArray.
473 Indexes.push_back(RadixArray.size());
474 RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
475 }
476 assert(CallStack->size() == Indexes.size());
477
478 // End with the call stack length.
479 RadixArray.push_back(CallStack->size());
480
481 // Return the index within RadixArray where we can start reconstructing a
482 // given call stack from.
483 return RadixArray.size() - 1;
484}
485
488 &&MemProfCallStackData,
489 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
490 llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) {
491 // Take the vector portion of MemProfCallStackData. The vector is exactly
492 // what we need to sort. Also, we no longer need its lookup capability.
493 llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
494
495 // Return early if we have no work to do.
496 if (CallStacks.empty()) {
497 RadixArray.clear();
498 CallStackPos.clear();
499 return;
500 }
501
502 // Sorting the list of call stacks in the dictionary order is sufficient to
503 // maximize the length of the common prefix between two adjacent call stacks
504 // and thus minimize the length of RadixArray. However, we go one step
505 // further and try to reduce the number of times we follow pointers to parents
506 // during deserilization. Consider a poorly encoded radix tree:
507 //
508 // CallStackId 1: f1 -> f2 -> f3
509 // |
510 // CallStackId 2: +--- f4 -> f5
511 // |
512 // CallStackId 3: +--> f6
513 //
514 // Here, f2 and f4 appear once and twice, respectively, in the call stacks.
515 // Once we encode CallStackId 1 into RadixArray, every other call stack with
516 // common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3
517 // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to
518 // parents twice.
519 //
520 // We try to alleviate the situation by sorting the list of call stacks by
521 // comparing the popularity of frames rather than the integer values of
522 // FrameIds. In the example above, f4 is more popular than f2, so we sort the
523 // call stacks and encode them as:
524 //
525 // CallStackId 2: f1 -- f4 -> f5
526 // | |
527 // CallStackId 3: | +--> f6
528 // |
529 // CallStackId 1: +--> f2 -> f3
530 //
531 // Notice that CallStackId 3 follows a pointer to a parent only once.
532 //
533 // All this is a quick-n-dirty trick to reduce the number of jumps. The
534 // proper way would be to compute the weight of each radix tree node -- how
535 // many call stacks use a given radix tree node, and encode a radix tree from
536 // the heaviest node first. We do not do so because that's a lot of work.
537 llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) {
538 // Call stacks are stored from leaf to root. Perform comparisons from the
539 // root.
540 return std::lexicographical_compare(
541 L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
542 [&](FrameId F1, FrameId F2) {
543 uint64_t H1 = FrameHistogram[F1].Count;
544 uint64_t H2 = FrameHistogram[F2].Count;
545 // Popular frames should come later because we encode call stacks from
546 // the last one in the list.
547 if (H1 != H2)
548 return H1 < H2;
549 // For sort stability.
550 return F1 < F2;
551 });
552 });
553
554 // Reserve some reasonable amount of storage.
555 RadixArray.clear();
556 RadixArray.reserve(CallStacks.size() * 8);
557
558 // Indexes will grow as long as the longest call stack.
559 Indexes.clear();
560 Indexes.reserve(512);
561
562 // CallStackPos will grow to exactly CallStacks.size() entries.
563 CallStackPos.clear();
564 CallStackPos.reserve(CallStacks.size());
565
566 // Compute the radix array. We encode one call stack at a time, computing the
567 // longest prefix that's shared with the previous call stack we encode. For
568 // each call stack we encode, we remember a mapping from CallStackId to its
569 // position within RadixArray.
570 //
571 // As an optimization, we encode from the last call stack in CallStacks to
572 // reduce the number of times we follow pointers to the parents. Consider the
573 // list of call stacks that has been sorted in the dictionary order:
574 //
575 // Call Stack 1: F1
576 // Call Stack 2: F1 -> F2
577 // Call Stack 3: F1 -> F2 -> F3
578 //
579 // If we traversed CallStacks in the forward order, we would end up with a
580 // radix tree like:
581 //
582 // Call Stack 1: F1
583 // |
584 // Call Stack 2: +---> F2
585 // |
586 // Call Stack 3: +---> F3
587 //
588 // Notice that each call stack jumps to the previous one. However, if we
589 // traverse CallStacks in the reverse order, then Call Stack 3 has the
590 // complete call stack encoded without any pointers. Call Stack 1 and 2 point
591 // to appropriate prefixes of Call Stack 3.
592 const llvm::SmallVector<FrameId> *Prev = nullptr;
593 for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
595 encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
596 CallStackPos.insert({CSId, Pos});
597 Prev = &CallStack;
598 }
599
600 // "RadixArray.size() - 1" below is problematic if RadixArray is empty.
601 assert(!RadixArray.empty());
602
603 // Reverse the radix array in place. We do so mostly for intuitive
604 // deserialization where we would read the length field and then the call
605 // stack frames proper just like any other array deserialization, except
606 // that we have occasional jumps to take advantage of prefixes.
607 for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J)
608 std::swap(RadixArray[I], RadixArray[J]);
609
610 // "Reverse" the indexes stored in CallStackPos.
611 for (auto &[K, V] : CallStackPos)
612 V = RadixArray.size() - 1 - V;
613}
614
617 &MemProfCallStackData) {
619
620 for (const auto &KV : MemProfCallStackData) {
621 const auto &CS = KV.second;
622 for (unsigned I = 0, E = CS.size(); I != E; ++I) {
623 auto &S = Histogram[CS[I]];
624 ++S.Count;
625 S.PositionSum += I;
626 }
627 }
628 return Histogram;
629}
630
632 for (const auto &AS : Record.AllocSites) {
633 assert(AS.CSId == hashCallStack(AS.CallStack));
634 (void)AS;
635 }
636}
637
640 &FunctionProfileData) {
641 for (const auto &[GUID, Record] : FunctionProfileData) {
642 (void)GUID;
644 }
645}
646
647} // namespace memprof
648} // namespace llvm
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
Tagged union holding either a T or a Error.
Definition: Error.h:481
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:595
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition: HashBuilder.h:66
Interface to help hash various types through a hasher type.
Definition: HashBuilder.h:139
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Definition: HashBuilder.h:149
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
An efficient, type-erasing, non-owning reference to a callable.
void build(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &&MemProfCallStackData, const llvm::DenseMap< FrameId, LinearFrameId > &MemProfFrameIndexes, llvm::DenseMap< FrameId, FrameStat > &FrameHistogram)
Definition: MemProf.cpp:486
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1090
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record)
Definition: MemProf.cpp:631
static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema, const unsigned char *Ptr)
Definition: MemProf.cpp:289
static void serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema, raw_ostream &OS, llvm::DenseMap< CallStackId, LinearCallStackId > &MemProfCallStackIndexes)
Definition: MemProf.cpp:172
MemProfSchema getHotColdSchema()
Definition: MemProf.cpp:21
uint32_t LinearCallStackId
Definition: MemProf.h:336
llvm::DenseMap< FrameId, FrameStat > computeFrameHistogram(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &MemProfCallStackData)
Definition: MemProf.cpp:616
CallStackId hashCallStack(ArrayRef< FrameId > CS)
Definition: MemProf.cpp:402
static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, const MemProfSchema &Schema)
Definition: MemProf.cpp:38
static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, const MemProfSchema &Schema)
Definition: MemProf.cpp:26
static void serializeV0(const IndexedMemProfRecord &Record, const MemProfSchema &Schema, raw_ostream &OS)
Definition: MemProf.cpp:131
static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, const MemProfSchema &Schema)
Definition: MemProf.cpp:48
uint64_t FrameId
Definition: MemProf.h:198
static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, const unsigned char *Ptr)
Definition: MemProf.cpp:214
uint64_t CallStackId
Definition: MemProf.h:333
MemProfSchema getFullSchema()
Definition: MemProf.cpp:13
Expected< MemProfSchema > readMemProfSchema(const unsigned char *&Buffer)
Definition: MemProf.cpp:376
void verifyFunctionProfileData(const llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > &FunctionProfileData)
Definition: MemProf.cpp:638
static void serializeV2(const IndexedMemProfRecord &Record, const MemProfSchema &Schema, raw_ostream &OS)
Definition: MemProf.cpp:154
static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, const unsigned char *Ptr)
Definition: MemProf.cpp:258
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition: BLAKE3.h:35
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
std::vector< Frame > CallStack
Definition: MemProf.h:379
PortableMemInfoBlock Info
Definition: MemProf.h:381
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:58
llvm::SmallVector< FrameId > CallStack
Definition: MemProf.h:344
llvm::SmallVector< CallStackId > CallSiteIds
Definition: MemProf.h:420
llvm::SmallVector< IndexedAllocationInfo > AllocSites
Definition: MemProf.h:411
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:117
void serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, llvm::DenseMap< CallStackId, LinearCallStackId > *MemProfCallStackIndexes=nullptr) const
Definition: MemProf.cpp:195
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, const unsigned char *Buffer, IndexedVersion Version)
Definition: MemProf.cpp:326
static GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition: MemProf.cpp:360
MemProfRecord toMemProfRecord(llvm::function_ref< std::vector< Frame >(const CallStackId)> Callback) const
Definition: MemProf.cpp:341
static size_t serializedSize(const MemProfSchema &Schema)
Definition: MemProf.h:169
Adapter to write values to a stream in a particular byte order.
Definition: EndianStream.h:67