LLVM 19.0.0git
MemProf.h
Go to the documentation of this file.
1#ifndef LLVM_PROFILEDATA_MEMPROF_H_
2#define LLVM_PROFILEDATA_MEMPROF_H_
3
10#include "llvm/Support/Endian.h"
13
14#include <bitset>
15#include <cstdint>
16#include <optional>
17
18namespace llvm {
19namespace memprof {
20
21struct MemProfRecord;
22
23// The versions of the indexed MemProf format
25 // Version 0: This version didn't have a version field.
27 // Version 1: Added a version field to the header.
29 // Version 2: Added a call stack table.
31 // Version 3: Added a radix tree for call stacks. Switched to linear IDs for
32 // frames and call stacks.
34};
35
38
39// Verify that the minimum and maximum satisfy the obvious constraint.
41
42enum class Meta : uint64_t {
43 Start = 0,
44#define MIBEntryDef(NameTag, Name, Type) NameTag,
46#undef MIBEntryDef
47 Size
48};
49
51
52// Returns the full schema currently in use.
54
55// Returns the schema consisting of the fields used for hot cold memory hinting.
57
58// Holds the actual MemInfoBlock data with all fields. Contents may be read or
59// written partially by providing an appropriate schema to the serialize and
60// deserialize methods.
63 explicit PortableMemInfoBlock(const MemInfoBlock &Block,
64 const MemProfSchema &IncomingSchema) {
65 for (const Meta Id : IncomingSchema)
66 Schema.set(llvm::to_underlying(Id));
67#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
69#undef MIBEntryDef
70 }
71
72 PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
73 deserialize(Schema, Ptr);
74 }
75
76 // Read the contents of \p Ptr based on the \p Schema to populate the
77 // MemInfoBlock member.
78 void deserialize(const MemProfSchema &IncomingSchema,
79 const unsigned char *Ptr) {
80 using namespace support;
81
82 Schema.reset();
83 for (const Meta Id : IncomingSchema) {
84 switch (Id) {
85#define MIBEntryDef(NameTag, Name, Type) \
86 case Meta::Name: { \
87 Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \
88 } break;
90#undef MIBEntryDef
91 default:
92 llvm_unreachable("Unknown meta type id, is the profile collected from "
93 "a newer version of the runtime?");
94 }
95
96 Schema.set(llvm::to_underlying(Id));
97 }
98 }
99
100 // Write the contents of the MemInfoBlock based on the \p Schema provided to
101 // the raw_ostream \p OS.
102 void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
103 using namespace support;
104
106 for (const Meta Id : Schema) {
107 switch (Id) {
108#define MIBEntryDef(NameTag, Name, Type) \
109 case Meta::Name: { \
110 LE.write<Type>(Name); \
111 } break;
113#undef MIBEntryDef
114 default:
115 llvm_unreachable("Unknown meta type id, invalid input?");
116 }
117 }
118 }
119
120 // Print out the contents of the MemInfoBlock in YAML format.
121 void printYAML(raw_ostream &OS) const {
122 OS << " MemInfoBlock:\n";
123#define MIBEntryDef(NameTag, Name, Type) \
124 OS << " " << #Name << ": " << Name << "\n";
126#undef MIBEntryDef
127 }
128
129 // Return the schema, only for unit tests.
131 return Schema;
132 }
133
134 // Define getters for each type which can be called by analyses.
135#define MIBEntryDef(NameTag, Name, Type) \
136 Type get##Name() const { \
137 assert(Schema[llvm::to_underlying(Meta::Name)]); \
138 return Name; \
139 }
141#undef MIBEntryDef
142
143 void clear() { *this = PortableMemInfoBlock(); }
144
146 if (Other.Schema != Schema)
147 return false;
148
149#define MIBEntryDef(NameTag, Name, Type) \
150 if (Schema[llvm::to_underlying(Meta::Name)] && \
151 Other.get##Name() != get##Name()) \
152 return false;
154#undef MIBEntryDef
155 return true;
156 }
157
159 return !operator==(Other);
160 }
161
162 static size_t serializedSize(const MemProfSchema &Schema) {
163 size_t Result = 0;
164
165 for (const Meta Id : Schema) {
166 switch (Id) {
167#define MIBEntryDef(NameTag, Name, Type) \
168 case Meta::Name: { \
169 Result += sizeof(Type); \
170 } break;
172#undef MIBEntryDef
173 default:
174 llvm_unreachable("Unknown meta type id, invalid input?");
175 }
176 }
177
178 return Result;
179 }
180
181private:
182 // The set of available fields, indexed by Meta::Name.
183 std::bitset<llvm::to_underlying(Meta::Size)> Schema;
184
185#define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
187#undef MIBEntryDef
188};
189
190// A type representing the id generated by hashing the contents of the Frame.
192// A type representing the id to index into the frame array.
194// Describes a call frame for a dynamic allocation context. The contents of
195// the frame are populated by symbolizing the stack depot call frame from the
196// compiler runtime.
197struct Frame {
198 // A uuid (uint64_t) identifying the function. It is obtained by
199 // llvm::md5(FunctionName) which returns the lower 64 bits.
201 // The symbol name for the function. Only populated in the Frame by the reader
202 // if requested during initialization. This field should not be serialized.
203 std::unique_ptr<std::string> SymbolName;
204 // The source line offset of the call from the beginning of parent function.
206 // The source column number of the call to help distinguish multiple calls
207 // on the same line.
209 // Whether the current frame is inlined.
211
212 Frame(const Frame &Other) {
213 Function = Other.Function;
214 SymbolName = Other.SymbolName
215 ? std::make_unique<std::string>(*Other.SymbolName)
216 : nullptr;
217 LineOffset = Other.LineOffset;
218 Column = Other.Column;
219 IsInlineFrame = Other.IsInlineFrame;
220 }
221
222 Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
223 : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
224
225 bool operator==(const Frame &Other) const {
226 // Ignore the SymbolName field to avoid a string compare. Comparing the
227 // function hash serves the same purpose.
228 return Other.Function == Function && Other.LineOffset == LineOffset &&
229 Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
230 }
231
233 Function = Other.Function;
234 SymbolName = Other.SymbolName
235 ? std::make_unique<std::string>(*Other.SymbolName)
236 : nullptr;
237 LineOffset = Other.LineOffset;
238 Column = Other.Column;
239 IsInlineFrame = Other.IsInlineFrame;
240 return *this;
241 }
242
243 bool operator!=(const Frame &Other) const { return !operator==(Other); }
244
245 bool hasSymbolName() const { return !!SymbolName; }
246
249 return *SymbolName;
250 }
251
252 std::string getSymbolNameOr(StringRef Alt) const {
253 return std::string(hasSymbolName() ? getSymbolName() : Alt);
254 }
255
256 // Write the contents of the frame to the ostream \p OS.
257 void serialize(raw_ostream &OS) const {
258 using namespace support;
259
261
262 // If the type of the GlobalValue::GUID changes, then we need to update
263 // the reader and the writer.
264 static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
265 "Expect GUID to be uint64_t.");
266 LE.write<uint64_t>(Function);
267
268 LE.write<uint32_t>(LineOffset);
269 LE.write<uint32_t>(Column);
270 LE.write<bool>(IsInlineFrame);
271 }
272
273 // Read a frame from char data which has been serialized as little endian.
274 static Frame deserialize(const unsigned char *Ptr) {
275 using namespace support;
276
277 const uint64_t F =
278 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
279 const uint32_t L =
280 endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
281 const uint32_t C =
282 endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
283 const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
284 return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
285 /*IsInlineFrame=*/I);
286 }
287
288 // Returns the size of the frame information.
289 static constexpr size_t serializedSize() {
290 return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
291 sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
292 }
293
294 // Print the frame information in YAML format.
295 void printYAML(raw_ostream &OS) const {
296 OS << " -\n"
297 << " Function: " << Function << "\n"
298 << " SymbolName: " << getSymbolNameOr("<None>") << "\n"
299 << " LineOffset: " << LineOffset << "\n"
300 << " Column: " << Column << "\n"
301 << " Inline: " << IsInlineFrame << "\n";
302 }
303
304 // Return a hash value based on the contents of the frame. Here we don't use
305 // hashing from llvm ADT since we are going to persist the hash id, the hash
306 // combine algorithm in ADT uses a new randomized seed each time.
307 inline FrameId hash() const {
308 auto HashCombine = [](auto Value, size_t Seed) {
309 std::hash<decltype(Value)> Hasher;
310 // The constant used below is the 64 bit representation of the fractional
311 // part of the golden ratio. Used here for the randomness in their bit
312 // pattern.
313 return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
314 };
315
316 size_t Result = 0;
317 Result ^= HashCombine(Function, Result);
318 Result ^= HashCombine(LineOffset, Result);
319 Result ^= HashCombine(Column, Result);
320 Result ^= HashCombine(IsInlineFrame, Result);
321 return static_cast<FrameId>(Result);
322 }
323};
324
325// A type representing the index into the table of call stacks.
327
328// A type representing the index into the call stack array.
330
331// Holds allocation information in a space efficient format where frames are
332// represented using unique identifiers.
334 // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
335 // order. Frame contents are stored out-of-line.
336 // TODO: Remove once we fully transition to CSId.
338 // Conceptually the same as above. We are going to keep both CallStack and
339 // CallStackId while we are transitioning from CallStack to CallStackId.
341 // The statistics obtained from the runtime for the allocation.
343
346 const MemInfoBlock &MB,
347 const MemProfSchema &Schema = getFullSchema())
348 : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {}
349
350 // Returns the size in bytes when this allocation info struct is serialized.
351 size_t serializedSize(const MemProfSchema &Schema,
352 IndexedVersion Version) const;
353
355 if (Other.Info != Info)
356 return false;
357
358 if (Other.CSId != CSId)
359 return false;
360 return true;
361 }
362
364 return !operator==(Other);
365 }
366};
367
368// Holds allocation information with frame contents inline. The type should
369// be used for temporary in-memory instances.
371 // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
372 std::vector<Frame> CallStack;
373 // Same as IndexedAllocationInfo::Info;
375
376 AllocationInfo() = default;
378 const IndexedAllocationInfo &IndexedAI,
379 llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
380 for (const FrameId &Id : IndexedAI.CallStack) {
381 CallStack.push_back(IdToFrameCallback(Id));
382 }
383 Info = IndexedAI.Info;
384 }
385
386 void printYAML(raw_ostream &OS) const {
387 OS << " -\n";
388 OS << " Callstack:\n";
389 // TODO: Print out the frame on one line with to make it easier for deep
390 // callstacks once we have a test to check valid YAML is generated.
391 for (const Frame &F : CallStack) {
392 F.printYAML(OS);
393 }
395 }
396};
397
398// Holds the memprof profile information for a function. The internal
399// representation stores frame ids for efficiency. This representation should
400// be used in the profile conversion and manipulation tools.
402 // Memory allocation sites in this function for which we have memory
403 // profiling data.
405 // Holds call sites in this function which are part of some memory
406 // allocation context. We store this as a list of locations, each with its
407 // list of inline locations in bottom-up order i.e. from leaf to root. The
408 // inline location list may include additional entries, users should pick
409 // the last entry in the list with the same function GUID.
411 // Conceptually the same as above. We are going to keep both CallSites and
412 // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
414
415 void clear() {
416 AllocSites.clear();
417 CallSites.clear();
418 }
419
421 // TODO: Filter out duplicates which may occur if multiple memprof
422 // profiles are merged together using llvm-profdata.
423 AllocSites.append(Other.AllocSites);
424 CallSites.append(Other.CallSites);
425 }
426
427 size_t serializedSize(const MemProfSchema &Schema,
428 IndexedVersion Version) const;
429
431 if (Other.AllocSites != AllocSites)
432 return false;
433
434 if (Other.CallSiteIds != CallSiteIds)
435 return false;
436 return true;
437 }
438
439 // Serializes the memprof records in \p Records to the ostream \p OS based
440 // on the schema provided in \p Schema.
441 void serialize(const MemProfSchema &Schema, raw_ostream &OS,
444 *MemProfCallStackIndexes = nullptr) const;
445
446 // Deserializes memprof records from the Buffer.
448 const unsigned char *Buffer,
450
451 // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to
452 // translate CallStackId to call stacks with frames inline.
454 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
455
456 // Returns the GUID for the function name after canonicalization. For
457 // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
458 // mapped to functions using this GUID.
459 static GlobalValue::GUID getGUID(const StringRef FunctionName);
460};
461
462// Holds the memprof profile information for a function. The internal
463// representation stores frame contents inline. This representation should
464// be used for small amount of temporary, in memory instances.
466 // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
468 // Same as IndexedMemProfRecord::CallSites with frame contents inline.
470
471 MemProfRecord() = default;
474 llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
475 for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
476 AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
477 }
478 for (const ArrayRef<FrameId> Site : Record.CallSites) {
479 std::vector<Frame> Frames;
480 for (const FrameId Id : Site) {
481 Frames.push_back(IdToFrameCallback(Id));
482 }
483 CallSites.push_back(Frames);
484 }
485 }
486
487 // Prints out the contents of the memprof record in YAML.
489 if (!AllocSites.empty()) {
490 OS << " AllocSites:\n";
491 for (const AllocationInfo &N : AllocSites)
492 N.printYAML(OS);
493 }
494
495 if (!CallSites.empty()) {
496 OS << " CallSites:\n";
497 for (const std::vector<Frame> &Frames : CallSites) {
498 for (const Frame &F : Frames) {
499 OS << " -\n";
500 F.printYAML(OS);
501 }
502 }
503 }
504 }
505};
506
507// Reads a memprof schema from a buffer. All entries in the buffer are
508// interpreted as uint64_t. The first entry in the buffer denotes the number of
509// ids in the schema. Subsequent entries are integers which map to memprof::Meta
510// enum class entries. After successfully reading the schema, the pointer is one
511// byte past the schema contents.
512Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
513
514// Trait for reading IndexedMemProfRecord data from the on-disk hash table.
516public:
522
525 : Version(V), Schema(S) {}
526
527 static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
528 static uint64_t GetInternalKey(uint64_t K) { return K; }
529 static uint64_t GetExternalKey(uint64_t K) { return K; }
530
532
533 static std::pair<offset_type, offset_type>
534 ReadKeyDataLength(const unsigned char *&D) {
535 using namespace support;
536
537 offset_type KeyLen =
538 endian::readNext<offset_type, llvm::endianness::little>(D);
539 offset_type DataLen =
540 endian::readNext<offset_type, llvm::endianness::little>(D);
541 return std::make_pair(KeyLen, DataLen);
542 }
543
544 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
545 using namespace support;
546 return endian::readNext<external_key_type, llvm::endianness::little>(D);
547 }
548
549 data_type ReadData(uint64_t K, const unsigned char *D,
550 offset_type /*Unused*/) {
551 Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
552 return Record;
553 }
554
555private:
556 // Holds the MemProf version.
557 IndexedVersion Version;
558 // Holds the memprof schema used to deserialize records.
559 MemProfSchema Schema;
560 // Holds the records from one function deserialized from the indexed format.
562};
563
564// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
566public:
569
572
575
576private:
577 // Pointer to the memprof schema to use for the generator.
578 const MemProfSchema *Schema;
579 // The MemProf version to use for the serialization.
580 IndexedVersion Version;
581
582 // Mappings from CallStackId to the indexes into the call stack array.
583 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;
584
585public:
586 // We do not support the default constructor, which does not set Version.
589 const MemProfSchema *Schema, IndexedVersion V,
590 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
591 : Schema(Schema), Version(V),
592 MemProfCallStackIndexes(MemProfCallStackIndexes) {}
593
595
596 std::pair<offset_type, offset_type>
598 using namespace support;
599
601 offset_type N = sizeof(K);
602 LE.write<offset_type>(N);
603 offset_type M = V.serializedSize(*Schema, Version);
604 LE.write<offset_type>(M);
605 return std::make_pair(N, M);
606 }
607
608 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
609 using namespace support;
611 LE.write<uint64_t>(K);
612 }
613
615 offset_type /*Unused*/) {
616 assert(Schema != nullptr && "MemProf schema is not initialized!");
617 V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
618 // Clear the IndexedMemProfRecord which results in clearing/freeing its
619 // vectors of allocs and callsites. This is owned by the associated on-disk
620 // hash table, but unused after this point. See also the comment added to
621 // the client which constructs the on-disk hash table for this trait.
622 V.clear();
623 }
624};
625
626// Trait for writing frame mappings to the on-disk hash table.
628public:
631
634
637
639
640 static std::pair<offset_type, offset_type>
642 using namespace support;
644 offset_type N = sizeof(K);
645 LE.write<offset_type>(N);
646 offset_type M = V.serializedSize();
647 LE.write<offset_type>(M);
648 return std::make_pair(N, M);
649 }
650
651 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
652 using namespace support;
654 LE.write<key_type>(K);
655 }
656
658 offset_type /*Unused*/) {
659 V.serialize(Out);
660 }
661};
662
663// Trait for reading frame mappings from the on-disk hash table.
665public:
666 using data_type = const Frame;
671
673 return A == B;
674 }
677
679
680 static std::pair<offset_type, offset_type>
681 ReadKeyDataLength(const unsigned char *&D) {
682 using namespace support;
683
684 offset_type KeyLen =
685 endian::readNext<offset_type, llvm::endianness::little>(D);
686 offset_type DataLen =
687 endian::readNext<offset_type, llvm::endianness::little>(D);
688 return std::make_pair(KeyLen, DataLen);
689 }
690
691 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
692 using namespace support;
693 return endian::readNext<external_key_type, llvm::endianness::little>(D);
694 }
695
696 data_type ReadData(uint64_t K, const unsigned char *D,
697 offset_type /*Unused*/) {
698 return Frame::deserialize(D);
699 }
700};
701
702// Trait for writing call stacks to the on-disk hash table.
704public:
707
710
713
715
716 static std::pair<offset_type, offset_type>
718 using namespace support;
720 // We do not explicitly emit the key length because it is a constant.
721 offset_type N = sizeof(K);
722 offset_type M = sizeof(FrameId) * V.size();
723 LE.write<offset_type>(M);
724 return std::make_pair(N, M);
725 }
726
727 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
728 using namespace support;
730 LE.write<key_type>(K);
731 }
732
734 offset_type /*Unused*/) {
735 using namespace support;
737 // Emit the frames. We do not explicitly emit the length of the vector
738 // because it can be inferred from the data length.
739 for (FrameId F : V)
740 LE.write<FrameId>(F);
741 }
742};
743
744// Trait for reading call stack mappings from the on-disk hash table.
746public:
752
754 return A == B;
755 }
758
760
761 static std::pair<offset_type, offset_type>
762 ReadKeyDataLength(const unsigned char *&D) {
763 using namespace support;
764
765 // We do not explicitly read the key length because it is a constant.
766 offset_type KeyLen = sizeof(external_key_type);
767 offset_type DataLen =
768 endian::readNext<offset_type, llvm::endianness::little>(D);
769 return std::make_pair(KeyLen, DataLen);
770 }
771
772 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
773 using namespace support;
774 return endian::readNext<external_key_type, llvm::endianness::little>(D);
775 }
776
777 data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
778 using namespace support;
780 // Derive the number of frames from the data length.
781 uint64_t NumFrames = Length / sizeof(FrameId);
782 assert(Length % sizeof(FrameId) == 0);
783 CS.reserve(NumFrames);
784 for (size_t I = 0; I != NumFrames; ++I) {
785 FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
786 CS.push_back(F);
787 }
788 return CS;
789 }
790};
791
792// Compute a CallStackId for a given call stack.
794
795namespace detail {
796// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
797// to do so in one of two different ways depending on the type of the hash
798// table.
799template <typename value_type, typename IterTy>
800value_type DerefIterator(IterTy Iter) {
801 using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
802 if constexpr (std::is_same_v<deref_type, value_type>)
803 return *Iter;
804 else
805 return Iter->second;
806}
807} // namespace detail
808
809// A function object that returns a frame for a given FrameId.
810template <typename MapTy> struct FrameIdConverter {
811 std::optional<FrameId> LastUnmappedId;
812 MapTy &Map;
813
816
817 // Delete the copy constructor and copy assignment operator to avoid a
818 // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
819 // while the original instance doesn't.
822
824 auto Iter = Map.find(Id);
825 if (Iter == Map.end()) {
826 LastUnmappedId = Id;
827 return Frame(0, 0, 0, false);
828 }
829 return detail::DerefIterator<Frame>(Iter);
830 }
831};
832
833// A function object that returns a call stack for a given CallStackId.
834template <typename MapTy> struct CallStackIdConverter {
835 std::optional<CallStackId> LastUnmappedId;
836 MapTy &Map;
838
843
844 // Delete the copy constructor and copy assignment operator to avoid a
845 // situation where a copy of CallStackIdConverter gets an error in
846 // LastUnmappedId while the original instance doesn't.
849
850 std::vector<Frame> operator()(CallStackId CSId) {
851 std::vector<Frame> Frames;
852 auto CSIter = Map.find(CSId);
853 if (CSIter == Map.end()) {
854 LastUnmappedId = CSId;
855 } else {
857 detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
858 Frames.reserve(CS.size());
859 for (FrameId Id : CS)
860 Frames.push_back(FrameIdToFrame(Id));
861 }
862 return Frames;
863 }
864};
865
866// A function object that returns a Frame stored at a given index into the Frame
867// array in the profile.
869 const unsigned char *FrameBase;
870
872 LinearFrameIdConverter(const unsigned char *FrameBase)
873 : FrameBase(FrameBase) {}
874
876 uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
878 }
879};
880
881// A function object that returns a call stack stored at a given index into the
882// call stack array in the profile.
884 const unsigned char *CallStackBase;
886
889 std::function<Frame(LinearFrameId)> FrameIdToFrame)
891
892 std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
893 std::vector<Frame> Frames;
894
895 const unsigned char *Ptr =
897 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
898 uint32_t NumFrames =
899 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
900 Frames.reserve(NumFrames);
901 for (; NumFrames; --NumFrames) {
902 LinearFrameId Elem =
903 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
904 // Follow a pointer to the parent, if any. See comments below on
905 // CallStackRadixTreeBuilder for the description of the radix tree format.
906 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
907 Ptr += (-Elem) * sizeof(LinearFrameId);
908 Elem =
909 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
910 }
911 // We shouldn't encounter another pointer.
912 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
913 Frames.push_back(FrameIdToFrame(Elem));
914 Ptr += sizeof(LinearFrameId);
915 }
916
917 return Frames;
918 }
919};
920
922 // A map to hold memprof data per function. The lower 64 bits obtained from
923 // the md5 hash of the function name is used to index into the map.
925
926 // A map to hold frame id to frame mappings. The mappings are used to
927 // convert IndexedMemProfRecord to MemProfRecords with frame information
928 // inline.
930
931 // A map to hold call stack id to call stacks.
933};
934
935struct FrameStat {
936 // The number of occurrences of a given FrameId.
938 // The sum of indexes where a given FrameId shows up.
940};
941
942// Compute a histogram of Frames in call stacks.
945 &MemProfCallStackData);
946
947// Construct a radix tree of call stacks.
948//
949// A set of call stacks might look like:
950//
951// CallStackId 1: f1 -> f2 -> f3
952// CallStackId 2: f1 -> f2 -> f4 -> f5
953// CallStackId 3: f1 -> f2 -> f4 -> f6
954// CallStackId 4: f7 -> f8 -> f9
955//
956// where each fn refers to a stack frame.
957//
958// Since we expect a lot of common prefixes, we can compress the call stacks
959// into a radix tree like:
960//
961// CallStackId 1: f1 -> f2 -> f3
962// |
963// CallStackId 2: +---> f4 -> f5
964// |
965// CallStackId 3: +---> f6
966//
967// CallStackId 4: f7 -> f8 -> f9
968//
969// Now, we are interested in retrieving call stacks for a given CallStackId, so
970// we just need a pointer from a given call stack to its parent. For example,
971// CallStackId 2 would point to CallStackId 1 as a parent.
972//
973// We serialize the radix tree above into a single array along with the length
974// of each call stack and pointers to the parent call stacks.
975//
976// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
977// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
978// ^ ^ ^ ^
979// | | | |
980// CallStackId 4: 0 --+ | | |
981// CallStackId 3: 4 --------------+ | |
982// CallStackId 2: 7 -----------------------+ |
983// CallStackId 1: 11 -----------------------------------+
984//
985// - LN indicates the length of a call stack, encoded as ordinary integer N.
986//
987// - JN indicates a pointer to the parent, encoded as -N.
988//
989// The radix tree allows us to reconstruct call stacks in the leaf-to-root
990// order as we scan the array from left ro right while following pointers to
991// parents along the way.
992//
993// For example, if we are decoding CallStackId 2, we start a forward traversal
994// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
995// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
996// picking up f2 and f1. We are done after collecting 4 frames as indicated at
997// the beginning of the traversal.
998//
999// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
1000// the radix tree array, so we do not explicitly encode mappings like:
1001// "CallStackId 1 -> 11".
1003 // The radix tree array.
1004 std::vector<LinearFrameId> RadixArray;
1005
1006 // Mapping from CallStackIds to indexes into RadixArray.
1008
1009 // In build, we partition a given call stack into two parts -- the prefix
1010 // that's common with the previously encoded call stack and the frames beyond
1011 // the common prefix -- the unique portion. Then we want to find out where
1012 // the common prefix is stored in RadixArray so that we can link the unique
1013 // portion to the common prefix. Indexes, declared below, helps with our
1014 // needs. Intuitively, Indexes tells us where each of the previously encoded
1015 // call stack is stored in RadixArray. More formally, Indexes satisfies:
1016 //
1017 // RadixArray[Indexes[I]] == Prev[I]
1018 //
1019 // for every I, where Prev is the the call stack in the root-to-leaf order
1020 // previously encoded by build. (Note that Prev, as passed to
1021 // encodeCallStack, is in the leaf-to-root order.)
1022 //
1023 // For example, if the call stack being encoded shares 5 frames at the root of
1024 // the call stack with the previously encoded call stack,
1025 // RadixArray[Indexes[0]] is the root frame of the common prefix.
1026 // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
1027 std::vector<LinearCallStackId> Indexes;
1028
1029 using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
1030
1031 // Encode a call stack into RadixArray. Return the starting index within
1032 // RadixArray.
1033 LinearCallStackId encodeCallStack(
1035 const llvm::SmallVector<FrameId> *Prev,
1036 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
1037
1038public:
1040
1041 // Build a radix tree array.
1043 &&MemProfCallStackData,
1044 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
1045 llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
1046
1047 const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
1048
1050 return std::move(CallStackPos);
1051 }
1052};
1053
1054// Verify that each CallStackId is computed with hashCallStack. This function
1055// is intended to help transition from CallStack to CSId in
1056// IndexedAllocationInfo.
1057void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);
1058
1059// Verify that each CallStackId is computed with hashCallStack. This function
1060// is intended to help transition from CallStack to CSId in
1061// IndexedAllocationInfo.
1064 &FunctionProfileData);
1065} // namespace memprof
1066} // namespace llvm
1067
1068#endif // LLVM_PROFILEDATA_MEMPROF_H_
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains library features backported from future STL versions.
raw_pwrite_stream & OS
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Tagged union holding either a T or a Error.
Definition: Error.h:474
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Definition: GlobalValue.h:586
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
LLVM Value Representation.
Definition: Value.h:74
An efficient, type-erasing, non-owning reference to a callable.
static uint64_t GetInternalKey(internal_key_type K)
Definition: MemProf.h:756
hash_value_type ComputeHash(internal_key_type K)
Definition: MemProf.h:759
static bool EqualKey(internal_key_type A, internal_key_type B)
Definition: MemProf.h:753
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:762
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:772
data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length)
Definition: MemProf.h:777
static uint64_t GetExternalKey(external_key_type K)
Definition: MemProf.h:757
void build(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &&MemProfCallStackData, const llvm::DenseMap< FrameId, LinearFrameId > &MemProfFrameIndexes, llvm::DenseMap< FrameId, FrameStat > &FrameHistogram)
Definition: MemProf.cpp:486
llvm::DenseMap< CallStackId, LinearCallStackId > takeCallStackPos()
Definition: MemProf.h:1049
const std::vector< LinearFrameId > & getRadixArray() const
Definition: MemProf.h:1047
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:714
static std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:717
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:733
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:727
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
data_type ReadData(uint64_t K, const unsigned char *D, offset_type)
Definition: MemProf.h:696
static uint64_t GetExternalKey(external_key_type K)
Definition: MemProf.h:676
static bool EqualKey(internal_key_type A, internal_key_type B)
Definition: MemProf.h:672
static uint64_t GetInternalKey(internal_key_type K)
Definition: MemProf.h:675
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:681
hash_value_type ComputeHash(internal_key_type K)
Definition: MemProf.h:678
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:691
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:638
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:651
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:657
static std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:641
static bool EqualKey(uint64_t A, uint64_t B)
Definition: MemProf.h:527
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:544
data_type ReadData(uint64_t K, const unsigned char *D, offset_type)
Definition: MemProf.h:549
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:534
static uint64_t GetInternalKey(uint64_t K)
Definition: MemProf.h:528
hash_value_type ComputeHash(uint64_t K)
Definition: MemProf.h:531
static uint64_t GetExternalKey(uint64_t K)
Definition: MemProf.h:529
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
Definition: MemProf.h:524
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:594
RecordWriterTrait(const MemProfSchema *Schema, IndexedVersion V, llvm::DenseMap< CallStackId, LinearCallStackId > *MemProfCallStackIndexes)
Definition: MemProf.h:588
std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:597
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:608
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:614
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
value_type DerefIterator(IterTy Iter)
Definition: MemProf.h:800
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record)
Definition: MemProf.cpp:631
constexpr uint64_t MaximumSupportedVersion
Definition: MemProf.h:37
MemProfSchema getHotColdSchema()
Definition: MemProf.cpp:21
uint32_t LinearCallStackId
Definition: MemProf.h:329
llvm::DenseMap< FrameId, FrameStat > computeFrameHistogram(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &MemProfCallStackData)
Definition: MemProf.cpp:616
CallStackId hashCallStack(ArrayRef< FrameId > CS)
Definition: MemProf.cpp:402
uint64_t FrameId
Definition: MemProf.h:191
constexpr uint64_t MinimumSupportedVersion
Definition: MemProf.h:36
uint64_t CallStackId
Definition: MemProf.h:326
MemProfSchema getFullSchema()
Definition: MemProf.cpp:13
Expected< MemProfSchema > readMemProfSchema(const unsigned char *&Buffer)
Definition: MemProf.cpp:376
void verifyFunctionProfileData(const llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > &FunctionProfileData)
Definition: MemProf.cpp:638
uint32_t LinearFrameId
Definition: MemProf.h:193
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
constexpr std::underlying_type_t< Enum > to_underlying(Enum E)
Returns underlying integer value of an enum.
typename llvm::remove_cvref< T >::type remove_cvref_t
@ Other
Any other memory.
#define N
std::vector< Frame > CallStack
Definition: MemProf.h:372
AllocationInfo(const IndexedAllocationInfo &IndexedAI, llvm::function_ref< const Frame(const FrameId)> IdToFrameCallback)
Definition: MemProf.h:377
PortableMemInfoBlock Info
Definition: MemProf.h:374
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:386
CallStackIdConverter & operator=(const CallStackIdConverter &)=delete
llvm::function_ref< Frame(FrameId)> FrameIdToFrame
Definition: MemProf.h:837
std::optional< CallStackId > LastUnmappedId
Definition: MemProf.h:835
CallStackIdConverter(MapTy &Map, llvm::function_ref< Frame(FrameId)> FrameIdToFrame)
Definition: MemProf.h:840
std::vector< Frame > operator()(CallStackId CSId)
Definition: MemProf.h:850
CallStackIdConverter(const CallStackIdConverter &)=delete
FrameIdConverter(const FrameIdConverter &)=delete
Frame operator()(FrameId Id)
Definition: MemProf.h:823
std::optional< FrameId > LastUnmappedId
Definition: MemProf.h:811
FrameIdConverter & operator=(const FrameIdConverter &)=delete
Frame & operator=(const Frame &Other)
Definition: MemProf.h:232
Frame(const Frame &Other)
Definition: MemProf.h:212
static constexpr size_t serializedSize()
Definition: MemProf.h:289
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:295
GlobalValue::GUID Function
Definition: MemProf.h:200
void serialize(raw_ostream &OS) const
Definition: MemProf.h:257
bool hasSymbolName() const
Definition: MemProf.h:245
std::string getSymbolNameOr(StringRef Alt) const
Definition: MemProf.h:252
StringRef getSymbolName() const
Definition: MemProf.h:247
bool operator==(const Frame &Other) const
Definition: MemProf.h:225
bool operator!=(const Frame &Other) const
Definition: MemProf.h:243
static Frame deserialize(const unsigned char *Ptr)
Definition: MemProf.h:274
Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
Definition: MemProf.h:222
FrameId hash() const
Definition: MemProf.h:307
uint32_t LineOffset
Definition: MemProf.h:205
std::unique_ptr< std::string > SymbolName
Definition: MemProf.h:203
IndexedAllocationInfo(ArrayRef< FrameId > CS, CallStackId CSId, const MemInfoBlock &MB, const MemProfSchema &Schema=getFullSchema())
Definition: MemProf.h:345
bool operator==(const IndexedAllocationInfo &Other) const
Definition: MemProf.h:354
bool operator!=(const IndexedAllocationInfo &Other) const
Definition: MemProf.h:363
PortableMemInfoBlock Info
Definition: MemProf.h:342
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:58
llvm::SmallVector< FrameId > CallStack
Definition: MemProf.h:337
llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > RecordData
Definition: MemProf.h:924
llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > CallStackData
Definition: MemProf.h:932
llvm::MapVector< FrameId, Frame > FrameData
Definition: MemProf.h:929
llvm::SmallVector< CallStackId > CallSiteIds
Definition: MemProf.h:413
llvm::SmallVector< IndexedAllocationInfo > AllocSites
Definition: MemProf.h:404
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:117
void serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, llvm::DenseMap< CallStackId, LinearCallStackId > *MemProfCallStackIndexes=nullptr) const
Definition: MemProf.cpp:195
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, const unsigned char *Buffer, IndexedVersion Version)
Definition: MemProf.cpp:326
llvm::SmallVector< llvm::SmallVector< FrameId > > CallSites
Definition: MemProf.h:410
bool operator==(const IndexedMemProfRecord &Other) const
Definition: MemProf.h:430
static GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition: MemProf.cpp:360
void merge(const IndexedMemProfRecord &Other)
Definition: MemProf.h:420
MemProfRecord toMemProfRecord(llvm::function_ref< std::vector< Frame >(const CallStackId)> Callback) const
Definition: MemProf.cpp:341
const unsigned char * CallStackBase
Definition: MemProf.h:884
std::function< Frame(LinearFrameId)> FrameIdToFrame
Definition: MemProf.h:885
LinearCallStackIdConverter(const unsigned char *CallStackBase, std::function< Frame(LinearFrameId)> FrameIdToFrame)
Definition: MemProf.h:888
std::vector< Frame > operator()(LinearCallStackId LinearCSId)
Definition: MemProf.h:892
LinearFrameIdConverter(const unsigned char *FrameBase)
Definition: MemProf.h:872
const unsigned char * FrameBase
Definition: MemProf.h:869
Frame operator()(LinearFrameId LinearId)
Definition: MemProf.h:875
llvm::SmallVector< std::vector< Frame > > CallSites
Definition: MemProf.h:469
llvm::SmallVector< AllocationInfo > AllocSites
Definition: MemProf.h:467
void print(llvm::raw_ostream &OS) const
Definition: MemProf.h:488
MemProfRecord(const IndexedMemProfRecord &Record, llvm::function_ref< const Frame(const FrameId Id)> IdToFrameCallback)
Definition: MemProf.h:472
bool operator!=(const PortableMemInfoBlock &Other) const
Definition: MemProf.h:158
void deserialize(const MemProfSchema &IncomingSchema, const unsigned char *Ptr)
Definition: MemProf.h:78
PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr)
Definition: MemProf.h:72
std::bitset< llvm::to_underlying(Meta::Size)> getSchema() const
Definition: MemProf.h:130
static size_t serializedSize(const MemProfSchema &Schema)
Definition: MemProf.h:162
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:121
void serialize(const MemProfSchema &Schema, raw_ostream &OS) const
Definition: MemProf.h:102
PortableMemInfoBlock(const MemInfoBlock &Block, const MemProfSchema &IncomingSchema)
Definition: MemProf.h:63
bool operator==(const PortableMemInfoBlock &Other) const
Definition: MemProf.h:145
Adapter to write values to a stream in a particular byte order.
Definition: EndianStream.h:67