LLVM 20.0.0git
MemProf.h
Go to the documentation of this file.
1#ifndef LLVM_PROFILEDATA_MEMPROF_H_
2#define LLVM_PROFILEDATA_MEMPROF_H_
3
10#include "llvm/Support/Endian.h"
13
14#include <bitset>
15#include <cstdint>
16#include <optional>
17
18namespace llvm {
19namespace memprof {
20
21struct MemProfRecord;
22
23// The versions of the indexed MemProf format
25 // Version 0: This version didn't have a version field.
27 // Version 1: Added a version field to the header.
29 // Version 2: Added a call stack table.
31 // Version 3: Added a radix tree for call stacks. Switched to linear IDs for
32 // frames and call stacks.
34};
35
38
39// Verify that the minimum and maximum satisfy the obvious constraint.
41
42enum class Meta : uint64_t {
43 Start = 0,
44#define MIBEntryDef(NameTag, Name, Type) NameTag,
46#undef MIBEntryDef
47 Size
48};
49
51
52// Returns the full schema currently in use.
54
55// Returns the schema consisting of the fields used for hot cold memory hinting.
57
58// Holds the actual MemInfoBlock data with all fields. Contents may be read or
59// written partially by providing an appropriate schema to the serialize and
60// deserialize methods.
63 explicit PortableMemInfoBlock(const MemInfoBlock &Block,
64 const MemProfSchema &IncomingSchema) {
65 for (const Meta Id : IncomingSchema)
66 Schema.set(llvm::to_underlying(Id));
67#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
69#undef MIBEntryDef
70 }
71
72 PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
73 deserialize(Schema, Ptr);
74 }
75
76 // Read the contents of \p Ptr based on the \p Schema to populate the
77 // MemInfoBlock member.
78 void deserialize(const MemProfSchema &IncomingSchema,
79 const unsigned char *Ptr) {
80 using namespace support;
81
82 Schema.reset();
83 for (const Meta Id : IncomingSchema) {
84 switch (Id) {
85#define MIBEntryDef(NameTag, Name, Type) \
86 case Meta::Name: { \
87 Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \
88 } break;
90#undef MIBEntryDef
91 default:
92 llvm_unreachable("Unknown meta type id, is the profile collected from "
93 "a newer version of the runtime?");
94 }
95
96 Schema.set(llvm::to_underlying(Id));
97 }
98 }
99
100 // Write the contents of the MemInfoBlock based on the \p Schema provided to
101 // the raw_ostream \p OS.
102 void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
103 using namespace support;
104
106 for (const Meta Id : Schema) {
107 switch (Id) {
108#define MIBEntryDef(NameTag, Name, Type) \
109 case Meta::Name: { \
110 LE.write<Type>(Name); \
111 } break;
113#undef MIBEntryDef
114 default:
115 llvm_unreachable("Unknown meta type id, invalid input?");
116 }
117 }
118 }
119
120 // Print out the contents of the MemInfoBlock in YAML format.
121 void printYAML(raw_ostream &OS) const {
122 OS << " MemInfoBlock:\n";
123#define MIBEntryDef(NameTag, Name, Type) \
124 OS << " " << #Name << ": " << Name << "\n";
126#undef MIBEntryDef
127 if (AccessHistogramSize > 0) {
128 OS << " " << "AccessHistogramValues" << ":";
129 for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
130 OS << " " << ((uint64_t *)AccessHistogram)[I];
131 }
132 OS << "\n";
133 }
134 }
135
136 // Return the schema, only for unit tests.
138 return Schema;
139 }
140
141 // Define getters for each type which can be called by analyses.
142#define MIBEntryDef(NameTag, Name, Type) \
143 Type get##Name() const { \
144 assert(Schema[llvm::to_underlying(Meta::Name)]); \
145 return Name; \
146 }
148#undef MIBEntryDef
149
150 void clear() { *this = PortableMemInfoBlock(); }
151
153 if (Other.Schema != Schema)
154 return false;
155
156#define MIBEntryDef(NameTag, Name, Type) \
157 if (Schema[llvm::to_underlying(Meta::Name)] && \
158 Other.get##Name() != get##Name()) \
159 return false;
161#undef MIBEntryDef
162 return true;
163 }
164
166 return !operator==(Other);
167 }
168
169 static size_t serializedSize(const MemProfSchema &Schema) {
170 size_t Result = 0;
171
172 for (const Meta Id : Schema) {
173 switch (Id) {
174#define MIBEntryDef(NameTag, Name, Type) \
175 case Meta::Name: { \
176 Result += sizeof(Type); \
177 } break;
179#undef MIBEntryDef
180 default:
181 llvm_unreachable("Unknown meta type id, invalid input?");
182 }
183 }
184
185 return Result;
186 }
187
188private:
189 // The set of available fields, indexed by Meta::Name.
190 std::bitset<llvm::to_underlying(Meta::Size)> Schema;
191
192#define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
194#undef MIBEntryDef
195};
196
197// A type representing the id generated by hashing the contents of the Frame.
199// A type representing the id to index into the frame array.
201// Describes a call frame for a dynamic allocation context. The contents of
202// the frame are populated by symbolizing the stack depot call frame from the
203// compiler runtime.
204struct Frame {
205 // A uuid (uint64_t) identifying the function. It is obtained by
206 // llvm::md5(FunctionName) which returns the lower 64 bits.
208 // The symbol name for the function. Only populated in the Frame by the reader
209 // if requested during initialization. This field should not be serialized.
210 std::unique_ptr<std::string> SymbolName;
211 // The source line offset of the call from the beginning of parent function.
213 // The source column number of the call to help distinguish multiple calls
214 // on the same line.
216 // Whether the current frame is inlined.
218
219 Frame(const Frame &Other) {
220 Function = Other.Function;
221 SymbolName = Other.SymbolName
222 ? std::make_unique<std::string>(*Other.SymbolName)
223 : nullptr;
224 LineOffset = Other.LineOffset;
225 Column = Other.Column;
226 IsInlineFrame = Other.IsInlineFrame;
227 }
228
229 Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
230 : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
231
232 bool operator==(const Frame &Other) const {
233 // Ignore the SymbolName field to avoid a string compare. Comparing the
234 // function hash serves the same purpose.
235 return Other.Function == Function && Other.LineOffset == LineOffset &&
236 Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
237 }
238
240 Function = Other.Function;
241 SymbolName = Other.SymbolName
242 ? std::make_unique<std::string>(*Other.SymbolName)
243 : nullptr;
244 LineOffset = Other.LineOffset;
245 Column = Other.Column;
246 IsInlineFrame = Other.IsInlineFrame;
247 return *this;
248 }
249
250 bool operator!=(const Frame &Other) const { return !operator==(Other); }
251
252 bool hasSymbolName() const { return !!SymbolName; }
253
256 return *SymbolName;
257 }
258
259 std::string getSymbolNameOr(StringRef Alt) const {
260 return std::string(hasSymbolName() ? getSymbolName() : Alt);
261 }
262
263 // Write the contents of the frame to the ostream \p OS.
264 void serialize(raw_ostream &OS) const {
265 using namespace support;
266
268
269 // If the type of the GlobalValue::GUID changes, then we need to update
270 // the reader and the writer.
271 static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
272 "Expect GUID to be uint64_t.");
273 LE.write<uint64_t>(Function);
274
275 LE.write<uint32_t>(LineOffset);
276 LE.write<uint32_t>(Column);
277 LE.write<bool>(IsInlineFrame);
278 }
279
280 // Read a frame from char data which has been serialized as little endian.
281 static Frame deserialize(const unsigned char *Ptr) {
282 using namespace support;
283
284 const uint64_t F =
285 endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
286 const uint32_t L =
287 endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
288 const uint32_t C =
289 endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
290 const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
291 return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
292 /*IsInlineFrame=*/I);
293 }
294
295 // Returns the size of the frame information.
296 static constexpr size_t serializedSize() {
297 return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
298 sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
299 }
300
301 // Print the frame information in YAML format.
302 void printYAML(raw_ostream &OS) const {
303 OS << " -\n"
304 << " Function: " << Function << "\n"
305 << " SymbolName: " << getSymbolNameOr("<None>") << "\n"
306 << " LineOffset: " << LineOffset << "\n"
307 << " Column: " << Column << "\n"
308 << " Inline: " << IsInlineFrame << "\n";
309 }
310
311 // Return a hash value based on the contents of the frame. Here we don't use
312 // hashing from llvm ADT since we are going to persist the hash id, the hash
313 // combine algorithm in ADT uses a new randomized seed each time.
314 inline FrameId hash() const {
315 auto HashCombine = [](auto Value, size_t Seed) {
316 std::hash<decltype(Value)> Hasher;
317 // The constant used below is the 64 bit representation of the fractional
318 // part of the golden ratio. Used here for the randomness in their bit
319 // pattern.
320 return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
321 };
322
323 size_t Result = 0;
324 Result ^= HashCombine(Function, Result);
325 Result ^= HashCombine(LineOffset, Result);
326 Result ^= HashCombine(Column, Result);
327 Result ^= HashCombine(IsInlineFrame, Result);
328 return static_cast<FrameId>(Result);
329 }
330};
331
332// A type representing the index into the table of call stacks.
334
335// A type representing the index into the call stack array.
337
338// Holds allocation information in a space efficient format where frames are
339// represented using unique identifiers.
341 // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
342 // order. Frame contents are stored out-of-line.
343 // TODO: Remove once we fully transition to CSId.
345 // Conceptually the same as above. We are going to keep both CallStack and
346 // CallStackId while we are transitioning from CallStack to CallStackId.
348 // The statistics obtained from the runtime for the allocation.
350
353 const MemInfoBlock &MB,
354 const MemProfSchema &Schema = getFullSchema())
355 : CallStack(CS), CSId(CSId), Info(MB, Schema) {}
356
357 // Returns the size in bytes when this allocation info struct is serialized.
358 size_t serializedSize(const MemProfSchema &Schema,
359 IndexedVersion Version) const;
360
362 if (Other.Info != Info)
363 return false;
364
365 if (Other.CSId != CSId)
366 return false;
367 return true;
368 }
369
371 return !operator==(Other);
372 }
373};
374
375// Holds allocation information with frame contents inline. The type should
376// be used for temporary in-memory instances.
378 // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
379 std::vector<Frame> CallStack;
380 // Same as IndexedAllocationInfo::Info;
382
383 AllocationInfo() = default;
385 const IndexedAllocationInfo &IndexedAI,
386 llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
387 for (const FrameId &Id : IndexedAI.CallStack) {
388 CallStack.push_back(IdToFrameCallback(Id));
389 }
390 Info = IndexedAI.Info;
391 }
392
393 void printYAML(raw_ostream &OS) const {
394 OS << " -\n";
395 OS << " Callstack:\n";
396 // TODO: Print out the frame on one line with to make it easier for deep
397 // callstacks once we have a test to check valid YAML is generated.
398 for (const Frame &F : CallStack) {
399 F.printYAML(OS);
400 }
402 }
403};
404
405// Holds the memprof profile information for a function. The internal
406// representation stores frame ids for efficiency. This representation should
407// be used in the profile conversion and manipulation tools.
409 // Memory allocation sites in this function for which we have memory
410 // profiling data.
412 // Holds call sites in this function which are part of some memory
413 // allocation context. We store this as a list of locations, each with its
414 // list of inline locations in bottom-up order i.e. from leaf to root. The
415 // inline location list may include additional entries, users should pick
416 // the last entry in the list with the same function GUID.
418 // Conceptually the same as above. We are going to keep both CallSites and
419 // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
421
422 void clear() {
423 AllocSites.clear();
424 CallSites.clear();
425 }
426
428 // TODO: Filter out duplicates which may occur if multiple memprof
429 // profiles are merged together using llvm-profdata.
430 AllocSites.append(Other.AllocSites);
431 CallSites.append(Other.CallSites);
432 }
433
434 size_t serializedSize(const MemProfSchema &Schema,
435 IndexedVersion Version) const;
436
438 if (Other.AllocSites != AllocSites)
439 return false;
440
441 if (Other.CallSiteIds != CallSiteIds)
442 return false;
443 return true;
444 }
445
446 // Serializes the memprof records in \p Records to the ostream \p OS based
447 // on the schema provided in \p Schema.
448 void serialize(const MemProfSchema &Schema, raw_ostream &OS,
451 *MemProfCallStackIndexes = nullptr) const;
452
453 // Deserializes memprof records from the Buffer.
455 const unsigned char *Buffer,
457
458 // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to
459 // translate CallStackId to call stacks with frames inline.
461 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
462
463 // Returns the GUID for the function name after canonicalization. For
464 // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
465 // mapped to functions using this GUID.
466 static GlobalValue::GUID getGUID(const StringRef FunctionName);
467};
468
469// Holds the memprof profile information for a function. The internal
470// representation stores frame contents inline. This representation should
471// be used for small amount of temporary, in memory instances.
473 // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
475 // Same as IndexedMemProfRecord::CallSites with frame contents inline.
477
478 MemProfRecord() = default;
481 llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
482 for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
483 AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
484 }
485 for (const ArrayRef<FrameId> Site : Record.CallSites) {
486 std::vector<Frame> Frames;
487 for (const FrameId Id : Site) {
488 Frames.push_back(IdToFrameCallback(Id));
489 }
490 CallSites.push_back(Frames);
491 }
492 }
493
494 // Prints out the contents of the memprof record in YAML.
496 if (!AllocSites.empty()) {
497 OS << " AllocSites:\n";
498 for (const AllocationInfo &N : AllocSites)
499 N.printYAML(OS);
500 }
501
502 if (!CallSites.empty()) {
503 OS << " CallSites:\n";
504 for (const std::vector<Frame> &Frames : CallSites) {
505 for (const Frame &F : Frames) {
506 OS << " -\n";
507 F.printYAML(OS);
508 }
509 }
510 }
511 }
512};
513
514// Reads a memprof schema from a buffer. All entries in the buffer are
515// interpreted as uint64_t. The first entry in the buffer denotes the number of
516// ids in the schema. Subsequent entries are integers which map to memprof::Meta
517// enum class entries. After successfully reading the schema, the pointer is one
518// byte past the schema contents.
519Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
520
521// Trait for reading IndexedMemProfRecord data from the on-disk hash table.
523public:
529
532 : Version(V), Schema(S) {}
533
534 static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
535 static uint64_t GetInternalKey(uint64_t K) { return K; }
536 static uint64_t GetExternalKey(uint64_t K) { return K; }
537
539
540 static std::pair<offset_type, offset_type>
541 ReadKeyDataLength(const unsigned char *&D) {
542 using namespace support;
543
544 offset_type KeyLen =
545 endian::readNext<offset_type, llvm::endianness::little>(D);
546 offset_type DataLen =
547 endian::readNext<offset_type, llvm::endianness::little>(D);
548 return std::make_pair(KeyLen, DataLen);
549 }
550
551 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
552 using namespace support;
553 return endian::readNext<external_key_type, llvm::endianness::little>(D);
554 }
555
556 data_type ReadData(uint64_t K, const unsigned char *D,
557 offset_type /*Unused*/) {
558 Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
559 return Record;
560 }
561
562private:
563 // Holds the MemProf version.
564 IndexedVersion Version;
565 // Holds the memprof schema used to deserialize records.
566 MemProfSchema Schema;
567 // Holds the records from one function deserialized from the indexed format.
569};
570
571// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
573public:
576
579
582
583private:
584 // Pointer to the memprof schema to use for the generator.
585 const MemProfSchema *Schema;
586 // The MemProf version to use for the serialization.
587 IndexedVersion Version;
588
589 // Mappings from CallStackId to the indexes into the call stack array.
590 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;
591
592public:
593 // We do not support the default constructor, which does not set Version.
596 const MemProfSchema *Schema, IndexedVersion V,
597 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
598 : Schema(Schema), Version(V),
599 MemProfCallStackIndexes(MemProfCallStackIndexes) {}
600
602
603 std::pair<offset_type, offset_type>
605 using namespace support;
606
608 offset_type N = sizeof(K);
609 LE.write<offset_type>(N);
610 offset_type M = V.serializedSize(*Schema, Version);
611 LE.write<offset_type>(M);
612 return std::make_pair(N, M);
613 }
614
615 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
616 using namespace support;
618 LE.write<uint64_t>(K);
619 }
620
622 offset_type /*Unused*/) {
623 assert(Schema != nullptr && "MemProf schema is not initialized!");
624 V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
625 // Clear the IndexedMemProfRecord which results in clearing/freeing its
626 // vectors of allocs and callsites. This is owned by the associated on-disk
627 // hash table, but unused after this point. See also the comment added to
628 // the client which constructs the on-disk hash table for this trait.
629 V.clear();
630 }
631};
632
633// Trait for writing frame mappings to the on-disk hash table.
635public:
638
641
644
646
647 static std::pair<offset_type, offset_type>
649 using namespace support;
651 offset_type N = sizeof(K);
652 LE.write<offset_type>(N);
653 offset_type M = V.serializedSize();
654 LE.write<offset_type>(M);
655 return std::make_pair(N, M);
656 }
657
658 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
659 using namespace support;
661 LE.write<key_type>(K);
662 }
663
665 offset_type /*Unused*/) {
666 V.serialize(Out);
667 }
668};
669
670// Trait for reading frame mappings from the on-disk hash table.
672public:
673 using data_type = const Frame;
678
680 return A == B;
681 }
684
686
687 static std::pair<offset_type, offset_type>
688 ReadKeyDataLength(const unsigned char *&D) {
689 using namespace support;
690
691 offset_type KeyLen =
692 endian::readNext<offset_type, llvm::endianness::little>(D);
693 offset_type DataLen =
694 endian::readNext<offset_type, llvm::endianness::little>(D);
695 return std::make_pair(KeyLen, DataLen);
696 }
697
698 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
699 using namespace support;
700 return endian::readNext<external_key_type, llvm::endianness::little>(D);
701 }
702
703 data_type ReadData(uint64_t K, const unsigned char *D,
704 offset_type /*Unused*/) {
705 return Frame::deserialize(D);
706 }
707};
708
709// Trait for writing call stacks to the on-disk hash table.
711public:
714
717
720
722
723 static std::pair<offset_type, offset_type>
725 using namespace support;
727 // We do not explicitly emit the key length because it is a constant.
728 offset_type N = sizeof(K);
729 offset_type M = sizeof(FrameId) * V.size();
730 LE.write<offset_type>(M);
731 return std::make_pair(N, M);
732 }
733
734 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
735 using namespace support;
737 LE.write<key_type>(K);
738 }
739
741 offset_type /*Unused*/) {
742 using namespace support;
744 // Emit the frames. We do not explicitly emit the length of the vector
745 // because it can be inferred from the data length.
746 for (FrameId F : V)
747 LE.write<FrameId>(F);
748 }
749};
750
751// Trait for reading call stack mappings from the on-disk hash table.
753public:
759
761 return A == B;
762 }
765
767
768 static std::pair<offset_type, offset_type>
769 ReadKeyDataLength(const unsigned char *&D) {
770 using namespace support;
771
772 // We do not explicitly read the key length because it is a constant.
773 offset_type KeyLen = sizeof(external_key_type);
774 offset_type DataLen =
775 endian::readNext<offset_type, llvm::endianness::little>(D);
776 return std::make_pair(KeyLen, DataLen);
777 }
778
779 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
780 using namespace support;
781 return endian::readNext<external_key_type, llvm::endianness::little>(D);
782 }
783
784 data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
785 using namespace support;
787 // Derive the number of frames from the data length.
788 uint64_t NumFrames = Length / sizeof(FrameId);
789 assert(Length % sizeof(FrameId) == 0);
790 CS.reserve(NumFrames);
791 for (size_t I = 0; I != NumFrames; ++I) {
792 FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
793 CS.push_back(F);
794 }
795 return CS;
796 }
797};
798
799// Compute a CallStackId for a given call stack.
801
802namespace detail {
803// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
804// to do so in one of two different ways depending on the type of the hash
805// table.
806template <typename value_type, typename IterTy>
807value_type DerefIterator(IterTy Iter) {
808 using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
809 if constexpr (std::is_same_v<deref_type, value_type>)
810 return *Iter;
811 else
812 return Iter->second;
813}
814} // namespace detail
815
816// A function object that returns a frame for a given FrameId.
817template <typename MapTy> struct FrameIdConverter {
818 std::optional<FrameId> LastUnmappedId;
819 MapTy &Map;
820
823
824 // Delete the copy constructor and copy assignment operator to avoid a
825 // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
826 // while the original instance doesn't.
829
831 auto Iter = Map.find(Id);
832 if (Iter == Map.end()) {
833 LastUnmappedId = Id;
834 return Frame(0, 0, 0, false);
835 }
836 return detail::DerefIterator<Frame>(Iter);
837 }
838};
839
840// A function object that returns a call stack for a given CallStackId.
841template <typename MapTy> struct CallStackIdConverter {
842 std::optional<CallStackId> LastUnmappedId;
843 MapTy &Map;
845
850
851 // Delete the copy constructor and copy assignment operator to avoid a
852 // situation where a copy of CallStackIdConverter gets an error in
853 // LastUnmappedId while the original instance doesn't.
856
857 std::vector<Frame> operator()(CallStackId CSId) {
858 std::vector<Frame> Frames;
859 auto CSIter = Map.find(CSId);
860 if (CSIter == Map.end()) {
861 LastUnmappedId = CSId;
862 } else {
864 detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
865 Frames.reserve(CS.size());
866 for (FrameId Id : CS)
867 Frames.push_back(FrameIdToFrame(Id));
868 }
869 return Frames;
870 }
871};
872
873// A function object that returns a Frame stored at a given index into the Frame
874// array in the profile.
876 const unsigned char *FrameBase;
877
879 LinearFrameIdConverter(const unsigned char *FrameBase)
880 : FrameBase(FrameBase) {}
881
883 uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
885 }
886};
887
888// A function object that returns a call stack stored at a given index into the
889// call stack array in the profile.
891 const unsigned char *CallStackBase;
893
896 std::function<Frame(LinearFrameId)> FrameIdToFrame)
898
899 std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
900 std::vector<Frame> Frames;
901
902 const unsigned char *Ptr =
904 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
905 uint32_t NumFrames =
906 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
907 Frames.reserve(NumFrames);
908 for (; NumFrames; --NumFrames) {
909 LinearFrameId Elem =
910 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
911 // Follow a pointer to the parent, if any. See comments below on
912 // CallStackRadixTreeBuilder for the description of the radix tree format.
913 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
914 Ptr += (-Elem) * sizeof(LinearFrameId);
915 Elem =
916 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
917 }
918 // We shouldn't encounter another pointer.
919 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
920 Frames.push_back(FrameIdToFrame(Elem));
921 Ptr += sizeof(LinearFrameId);
922 }
923
924 return Frames;
925 }
926};
927
929 // A map to hold memprof data per function. The lower 64 bits obtained from
930 // the md5 hash of the function name is used to index into the map.
932
933 // A map to hold frame id to frame mappings. The mappings are used to
934 // convert IndexedMemProfRecord to MemProfRecords with frame information
935 // inline.
937
938 // A map to hold call stack id to call stacks.
940};
941
942struct FrameStat {
943 // The number of occurrences of a given FrameId.
945 // The sum of indexes where a given FrameId shows up.
947};
948
949// Compute a histogram of Frames in call stacks.
952 &MemProfCallStackData);
953
954// Construct a radix tree of call stacks.
955//
956// A set of call stacks might look like:
957//
958// CallStackId 1: f1 -> f2 -> f3
959// CallStackId 2: f1 -> f2 -> f4 -> f5
960// CallStackId 3: f1 -> f2 -> f4 -> f6
961// CallStackId 4: f7 -> f8 -> f9
962//
963// where each fn refers to a stack frame.
964//
965// Since we expect a lot of common prefixes, we can compress the call stacks
966// into a radix tree like:
967//
968// CallStackId 1: f1 -> f2 -> f3
969// |
970// CallStackId 2: +---> f4 -> f5
971// |
972// CallStackId 3: +---> f6
973//
974// CallStackId 4: f7 -> f8 -> f9
975//
976// Now, we are interested in retrieving call stacks for a given CallStackId, so
977// we just need a pointer from a given call stack to its parent. For example,
978// CallStackId 2 would point to CallStackId 1 as a parent.
979//
980// We serialize the radix tree above into a single array along with the length
981// of each call stack and pointers to the parent call stacks.
982//
983// Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
984// Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
985// ^ ^ ^ ^
986// | | | |
987// CallStackId 4: 0 --+ | | |
988// CallStackId 3: 4 --------------+ | |
989// CallStackId 2: 7 -----------------------+ |
990// CallStackId 1: 11 -----------------------------------+
991//
992// - LN indicates the length of a call stack, encoded as ordinary integer N.
993//
994// - JN indicates a pointer to the parent, encoded as -N.
995//
996// The radix tree allows us to reconstruct call stacks in the leaf-to-root
997// order as we scan the array from left ro right while following pointers to
998// parents along the way.
999//
1000// For example, if we are decoding CallStackId 2, we start a forward traversal
1001// at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When
1002// we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
1003// picking up f2 and f1. We are done after collecting 4 frames as indicated at
1004// the beginning of the traversal.
1005//
1006// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
1007// the radix tree array, so we do not explicitly encode mappings like:
1008// "CallStackId 1 -> 11".
1010 // The radix tree array.
1011 std::vector<LinearFrameId> RadixArray;
1012
1013 // Mapping from CallStackIds to indexes into RadixArray.
1015
1016 // In build, we partition a given call stack into two parts -- the prefix
1017 // that's common with the previously encoded call stack and the frames beyond
1018 // the common prefix -- the unique portion. Then we want to find out where
1019 // the common prefix is stored in RadixArray so that we can link the unique
1020 // portion to the common prefix. Indexes, declared below, helps with our
1021 // needs. Intuitively, Indexes tells us where each of the previously encoded
1022 // call stack is stored in RadixArray. More formally, Indexes satisfies:
1023 //
1024 // RadixArray[Indexes[I]] == Prev[I]
1025 //
1026 // for every I, where Prev is the the call stack in the root-to-leaf order
1027 // previously encoded by build. (Note that Prev, as passed to
1028 // encodeCallStack, is in the leaf-to-root order.)
1029 //
1030 // For example, if the call stack being encoded shares 5 frames at the root of
1031 // the call stack with the previously encoded call stack,
1032 // RadixArray[Indexes[0]] is the root frame of the common prefix.
1033 // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
1034 std::vector<LinearCallStackId> Indexes;
1035
1036 using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
1037
1038 // Encode a call stack into RadixArray. Return the starting index within
1039 // RadixArray.
1040 LinearCallStackId encodeCallStack(
1042 const llvm::SmallVector<FrameId> *Prev,
1043 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
1044
1045public:
1047
1048 // Build a radix tree array.
1050 &&MemProfCallStackData,
1051 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
1052 llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
1053
1054 const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
1055
1057 return std::move(CallStackPos);
1058 }
1059};
1060
1061// Verify that each CallStackId is computed with hashCallStack. This function
1062// is intended to help transition from CallStack to CSId in
1063// IndexedAllocationInfo.
1064void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);
1065
1066// Verify that each CallStackId is computed with hashCallStack. This function
1067// is intended to help transition from CallStack to CSId in
1068// IndexedAllocationInfo.
1071 &FunctionProfileData);
1072} // namespace memprof
1073} // namespace llvm
1074
1075#endif // LLVM_PROFILEDATA_MEMPROF_H_
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains library features backported from future STL versions.
raw_pwrite_stream & OS
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Tagged union holding either a T or a Error.
Definition: Error.h:481
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Definition: GlobalValue.h:587
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
LLVM Value Representation.
Definition: Value.h:74
An efficient, type-erasing, non-owning reference to a callable.
static uint64_t GetInternalKey(internal_key_type K)
Definition: MemProf.h:763
hash_value_type ComputeHash(internal_key_type K)
Definition: MemProf.h:766
static bool EqualKey(internal_key_type A, internal_key_type B)
Definition: MemProf.h:760
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:769
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:779
data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length)
Definition: MemProf.h:784
static uint64_t GetExternalKey(external_key_type K)
Definition: MemProf.h:764
void build(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &&MemProfCallStackData, const llvm::DenseMap< FrameId, LinearFrameId > &MemProfFrameIndexes, llvm::DenseMap< FrameId, FrameStat > &FrameHistogram)
Definition: MemProf.cpp:486
llvm::DenseMap< CallStackId, LinearCallStackId > takeCallStackPos()
Definition: MemProf.h:1056
const std::vector< LinearFrameId > & getRadixArray() const
Definition: MemProf.h:1054
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:721
static std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:724
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:740
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:734
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
data_type ReadData(uint64_t K, const unsigned char *D, offset_type)
Definition: MemProf.h:703
static uint64_t GetExternalKey(external_key_type K)
Definition: MemProf.h:683
static bool EqualKey(internal_key_type A, internal_key_type B)
Definition: MemProf.h:679
static uint64_t GetInternalKey(internal_key_type K)
Definition: MemProf.h:682
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:688
hash_value_type ComputeHash(internal_key_type K)
Definition: MemProf.h:685
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:698
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:645
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:658
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:664
static std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:648
static bool EqualKey(uint64_t A, uint64_t B)
Definition: MemProf.h:534
uint64_t ReadKey(const unsigned char *D, offset_type)
Definition: MemProf.h:551
data_type ReadData(uint64_t K, const unsigned char *D, offset_type)
Definition: MemProf.h:556
static std::pair< offset_type, offset_type > ReadKeyDataLength(const unsigned char *&D)
Definition: MemProf.h:541
static uint64_t GetInternalKey(uint64_t K)
Definition: MemProf.h:535
hash_value_type ComputeHash(uint64_t K)
Definition: MemProf.h:538
static uint64_t GetExternalKey(uint64_t K)
Definition: MemProf.h:536
RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
Definition: MemProf.h:531
static hash_value_type ComputeHash(key_type_ref K)
Definition: MemProf.h:601
RecordWriterTrait(const MemProfSchema *Schema, IndexedVersion V, llvm::DenseMap< CallStackId, LinearCallStackId > *MemProfCallStackIndexes)
Definition: MemProf.h:595
std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
Definition: MemProf.h:604
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type)
Definition: MemProf.h:615
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type)
Definition: MemProf.h:621
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
value_type DerefIterator(IterTy Iter)
Definition: MemProf.h:807
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record)
Definition: MemProf.cpp:631
constexpr uint64_t MaximumSupportedVersion
Definition: MemProf.h:37
MemProfSchema getHotColdSchema()
Definition: MemProf.cpp:21
uint32_t LinearCallStackId
Definition: MemProf.h:336
llvm::DenseMap< FrameId, FrameStat > computeFrameHistogram(llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > &MemProfCallStackData)
Definition: MemProf.cpp:616
CallStackId hashCallStack(ArrayRef< FrameId > CS)
Definition: MemProf.cpp:402
uint64_t FrameId
Definition: MemProf.h:198
constexpr uint64_t MinimumSupportedVersion
Definition: MemProf.h:36
uint64_t CallStackId
Definition: MemProf.h:333
MemProfSchema getFullSchema()
Definition: MemProf.cpp:13
Expected< MemProfSchema > readMemProfSchema(const unsigned char *&Buffer)
Definition: MemProf.cpp:376
void verifyFunctionProfileData(const llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > &FunctionProfileData)
Definition: MemProf.cpp:638
uint32_t LinearFrameId
Definition: MemProf.h:200
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
constexpr std::underlying_type_t< Enum > to_underlying(Enum E)
Returns underlying integer value of an enum.
typename llvm::remove_cvref< T >::type remove_cvref_t
@ Other
Any other memory.
#define N
std::vector< Frame > CallStack
Definition: MemProf.h:379
AllocationInfo(const IndexedAllocationInfo &IndexedAI, llvm::function_ref< const Frame(const FrameId)> IdToFrameCallback)
Definition: MemProf.h:384
PortableMemInfoBlock Info
Definition: MemProf.h:381
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:393
CallStackIdConverter & operator=(const CallStackIdConverter &)=delete
llvm::function_ref< Frame(FrameId)> FrameIdToFrame
Definition: MemProf.h:844
std::optional< CallStackId > LastUnmappedId
Definition: MemProf.h:842
CallStackIdConverter(MapTy &Map, llvm::function_ref< Frame(FrameId)> FrameIdToFrame)
Definition: MemProf.h:847
std::vector< Frame > operator()(CallStackId CSId)
Definition: MemProf.h:857
CallStackIdConverter(const CallStackIdConverter &)=delete
FrameIdConverter(const FrameIdConverter &)=delete
Frame operator()(FrameId Id)
Definition: MemProf.h:830
std::optional< FrameId > LastUnmappedId
Definition: MemProf.h:818
FrameIdConverter & operator=(const FrameIdConverter &)=delete
Frame & operator=(const Frame &Other)
Definition: MemProf.h:239
Frame(const Frame &Other)
Definition: MemProf.h:219
static constexpr size_t serializedSize()
Definition: MemProf.h:296
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:302
GlobalValue::GUID Function
Definition: MemProf.h:207
void serialize(raw_ostream &OS) const
Definition: MemProf.h:264
bool hasSymbolName() const
Definition: MemProf.h:252
std::string getSymbolNameOr(StringRef Alt) const
Definition: MemProf.h:259
StringRef getSymbolName() const
Definition: MemProf.h:254
bool operator==(const Frame &Other) const
Definition: MemProf.h:232
bool operator!=(const Frame &Other) const
Definition: MemProf.h:250
static Frame deserialize(const unsigned char *Ptr)
Definition: MemProf.h:281
Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
Definition: MemProf.h:229
FrameId hash() const
Definition: MemProf.h:314
uint32_t LineOffset
Definition: MemProf.h:212
std::unique_ptr< std::string > SymbolName
Definition: MemProf.h:210
IndexedAllocationInfo(ArrayRef< FrameId > CS, CallStackId CSId, const MemInfoBlock &MB, const MemProfSchema &Schema=getFullSchema())
Definition: MemProf.h:352
bool operator==(const IndexedAllocationInfo &Other) const
Definition: MemProf.h:361
bool operator!=(const IndexedAllocationInfo &Other) const
Definition: MemProf.h:370
PortableMemInfoBlock Info
Definition: MemProf.h:349
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:58
llvm::SmallVector< FrameId > CallStack
Definition: MemProf.h:344
llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > CallStacks
Definition: MemProf.h:939
llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > Records
Definition: MemProf.h:931
llvm::MapVector< FrameId, Frame > Frames
Definition: MemProf.h:936
llvm::SmallVector< CallStackId > CallSiteIds
Definition: MemProf.h:420
llvm::SmallVector< IndexedAllocationInfo > AllocSites
Definition: MemProf.h:411
size_t serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const
Definition: MemProf.cpp:117
void serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, llvm::DenseMap< CallStackId, LinearCallStackId > *MemProfCallStackIndexes=nullptr) const
Definition: MemProf.cpp:195
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, const unsigned char *Buffer, IndexedVersion Version)
Definition: MemProf.cpp:326
llvm::SmallVector< llvm::SmallVector< FrameId > > CallSites
Definition: MemProf.h:417
bool operator==(const IndexedMemProfRecord &Other) const
Definition: MemProf.h:437
static GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition: MemProf.cpp:360
void merge(const IndexedMemProfRecord &Other)
Definition: MemProf.h:427
MemProfRecord toMemProfRecord(llvm::function_ref< std::vector< Frame >(const CallStackId)> Callback) const
Definition: MemProf.cpp:341
const unsigned char * CallStackBase
Definition: MemProf.h:891
std::function< Frame(LinearFrameId)> FrameIdToFrame
Definition: MemProf.h:892
LinearCallStackIdConverter(const unsigned char *CallStackBase, std::function< Frame(LinearFrameId)> FrameIdToFrame)
Definition: MemProf.h:895
std::vector< Frame > operator()(LinearCallStackId LinearCSId)
Definition: MemProf.h:899
LinearFrameIdConverter(const unsigned char *FrameBase)
Definition: MemProf.h:879
const unsigned char * FrameBase
Definition: MemProf.h:876
Frame operator()(LinearFrameId LinearId)
Definition: MemProf.h:882
llvm::SmallVector< std::vector< Frame > > CallSites
Definition: MemProf.h:476
llvm::SmallVector< AllocationInfo > AllocSites
Definition: MemProf.h:474
void print(llvm::raw_ostream &OS) const
Definition: MemProf.h:495
MemProfRecord(const IndexedMemProfRecord &Record, llvm::function_ref< const Frame(const FrameId Id)> IdToFrameCallback)
Definition: MemProf.h:479
bool operator!=(const PortableMemInfoBlock &Other) const
Definition: MemProf.h:165
void deserialize(const MemProfSchema &IncomingSchema, const unsigned char *Ptr)
Definition: MemProf.h:78
PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr)
Definition: MemProf.h:72
std::bitset< llvm::to_underlying(Meta::Size)> getSchema() const
Definition: MemProf.h:137
static size_t serializedSize(const MemProfSchema &Schema)
Definition: MemProf.h:169
void printYAML(raw_ostream &OS) const
Definition: MemProf.h:121
void serialize(const MemProfSchema &Schema, raw_ostream &OS) const
Definition: MemProf.h:102
PortableMemInfoBlock(const MemInfoBlock &Block, const MemProfSchema &IncomingSchema)
Definition: MemProf.h:63
bool operator==(const PortableMemInfoBlock &Other) const
Definition: MemProf.h:152
Adapter to write values to a stream in a particular byte order.
Definition: EndianStream.h:67