LLVM 23.0.0git
InstrProfWriter.cpp
Go to the documentation of this file.
1//===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for writing profiling data for clang's
10// instrumentation based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Error.h"
28#include <cstdint>
29#include <ctime>
30#include <memory>
31#include <string>
32#include <tuple>
33#include <utility>
34#include <vector>
35
36using namespace llvm;
37
38namespace llvm {
39
41public:
44
47
50
54 bool WritePrevVersion = false;
55
57
61
62 std::pair<offset_type, offset_type>
64 using namespace support;
65
67
68 offset_type N = K.size();
69 LE.write<offset_type>(N);
70
71 offset_type M = 0;
72 for (const auto &ProfileData : *V) {
73 const InstrProfRecord &ProfRecord = ProfileData.second;
74 M += sizeof(uint64_t); // The function hash
75 M += sizeof(uint64_t); // The size of the Counts vector
76 M += ProfRecord.Counts.size() * sizeof(uint64_t);
77 M += sizeof(uint64_t); // The size of the Bitmap vector
78 if (WritePrevVersion) {
79 // Compatibility mode: each bitmap byte is stored as a uint64_t.
80 M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);
81 } else {
82 // Version 14+: bitmap bytes as uint8_t with padding, plus
83 // uniformity bits.
84 M += alignTo(ProfRecord.BitmapBytes.size(), sizeof(uint64_t));
85 M += sizeof(uint64_t); // The size of the UniformityBits vector
86 M += alignTo(ProfRecord.UniformityBits.size(), sizeof(uint64_t));
87 }
88
89 // Value data
90 M += ValueProfData::getSize(ProfileData.second);
91 }
92 LE.write<offset_type>(M);
93
94 return std::make_pair(N, M);
95 }
96
98 Out.write(K.data(), N);
99 }
100
102 offset_type) {
103 using namespace support;
104
106 for (const auto &ProfileData : *V) {
107 const InstrProfRecord &ProfRecord = ProfileData.second;
108 if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
109 CSSummaryBuilder->addRecord(ProfRecord);
110 else
111 SummaryBuilder->addRecord(ProfRecord);
112
113 LE.write<uint64_t>(ProfileData.first); // Function hash
114 LE.write<uint64_t>(ProfRecord.Counts.size());
115 for (uint64_t I : ProfRecord.Counts)
116 LE.write<uint64_t>(I);
117
118 LE.write<uint64_t>(ProfRecord.BitmapBytes.size());
119 if (WritePrevVersion) {
120 // Compatibility mode: each bitmap byte is stored as a uint64_t.
121 for (uint8_t I : ProfRecord.BitmapBytes)
122 LE.write<uint64_t>(I);
123 } else {
124 // Version 14+: bitmap bytes as uint8_t with padding.
125 for (uint8_t I : ProfRecord.BitmapBytes)
126 LE.write<uint8_t>(I);
127 for (size_t I = ProfRecord.BitmapBytes.size();
128 I < alignTo(ProfRecord.BitmapBytes.size(), sizeof(uint64_t)); ++I)
129 LE.write<uint8_t>(0);
130
131 // Write uniformity bits (AMDGPU offload profiling).
132 LE.write<uint64_t>(ProfRecord.UniformityBits.size());
133 for (uint8_t I : ProfRecord.UniformityBits)
134 LE.write<uint8_t>(I);
135 for (size_t I = ProfRecord.UniformityBits.size();
136 I < alignTo(ProfRecord.UniformityBits.size(), sizeof(uint64_t));
137 ++I)
138 LE.write<uint8_t>(0);
139 }
140
141 // Write value data
142 std::unique_ptr<ValueProfData> VDataPtr =
143 ValueProfData::serializeFrom(ProfileData.second);
144 uint32_t S = VDataPtr->getSize();
145 VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
146 Out.write((const char *)VDataPtr.get(), S);
147 }
148 }
149};
150
151} // end namespace llvm
152
154 bool Sparse, uint64_t TemporalProfTraceReservoirSize,
155 uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
156 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
157 bool MemprofGenerateRandomHotness,
158 unsigned MemprofGenerateRandomHotnessSeed)
159 : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
160 TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
161 InfoObj(new InstrProfRecordWriterTrait()),
162 WritePrevVersion(WritePrevVersion),
163 MemProfVersionRequested(MemProfVersionRequested),
164 MemProfFullSchema(MemProfFullSchema),
165 MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) {
166 // Set up the random number seed if requested.
167 if (MemprofGenerateRandomHotness) {
168 unsigned seed = MemprofGenerateRandomHotnessSeed
169 ? MemprofGenerateRandomHotnessSeed
170 : std::time(nullptr);
171 errs() << "random hotness seed = " << seed << "\n";
172 std::srand(seed);
173 }
174}
175
177
178// Internal interface for testing purpose only.
180 InfoObj->ValueProfDataEndianness = Endianness;
181}
182
183void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; }
184
186 function_ref<void(Error)> Warn) {
187 auto Name = I.Name;
188 auto Hash = I.Hash;
189 addRecord(Name, Hash, std::move(I), Weight, Warn);
190}
191
193 OverlapStats &Overlap,
194 OverlapStats &FuncLevelOverlap,
195 const OverlapFuncFilters &FuncFilter) {
196 auto Name = Other.Name;
197 auto Hash = Other.Hash;
198 Other.accumulateCounts(FuncLevelOverlap.Test);
199 auto It = FunctionData.find(Name);
200 if (It == FunctionData.end()) {
201 Overlap.addOneUnique(FuncLevelOverlap.Test);
202 return;
203 }
204 if (FuncLevelOverlap.Test.CountSum < 1.0f) {
205 Overlap.Overlap.NumEntries += 1;
206 return;
207 }
208 auto &ProfileDataMap = It->second;
209 auto [Where, NewFunc] = ProfileDataMap.try_emplace(Hash);
210 if (NewFunc) {
211 Overlap.addOneMismatch(FuncLevelOverlap.Test);
212 return;
213 }
214 InstrProfRecord &Dest = Where->second;
215
216 uint64_t ValueCutoff = FuncFilter.ValueCutoff;
217 if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
218 ValueCutoff = 0;
219
220 Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
221}
222
224 InstrProfRecord &&I, uint64_t Weight,
225 function_ref<void(Error)> Warn) {
226 I.computeBlockUniformity();
227
228 auto &ProfileDataMap = FunctionData[Name];
229
230 auto [Where, NewFunc] = ProfileDataMap.try_emplace(Hash);
231 InstrProfRecord &Dest = Where->second;
232
233 auto MapWarn = [&](instrprof_error E) {
235 };
236
237 if (NewFunc) {
238 // We've never seen a function with this name and hash, add it.
239 Dest = std::move(I);
240 if (Weight > 1)
241 Dest.scale(Weight, 1, MapWarn);
242 } else {
243 // We're updating a function we've seen before.
244 Dest.merge(I, Weight, MapWarn);
245 }
246
247 Dest.sortValueData();
248}
249
250void InstrProfWriter::addMemProfRecord(
252 auto NewRecord = Record;
253 // Provoke random hotness values if requested. We specify the lifetime access
254 // density and lifetime length that will result in a cold or not cold hotness.
255 // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp.
256 if (MemprofGenerateRandomHotness) {
257 for (auto &Alloc : NewRecord.AllocSites) {
258 // To get a not cold context, set the lifetime access density to the
259 // maximum value and the lifetime to 0.
260 uint64_t NewTLAD = std::numeric_limits<uint64_t>::max();
261 uint64_t NewTL = 0;
262 bool IsCold = std::rand() % 2;
263 if (IsCold) {
264 // To get a cold context, set the lifetime access density to 0 and the
265 // lifetime to the maximum value.
266 NewTLAD = 0;
267 NewTL = std::numeric_limits<uint64_t>::max();
268 }
269 Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD);
270 Alloc.Info.setTotalLifetime(NewTL);
271 }
272 }
273 MemProfSumBuilder.addRecord(NewRecord);
274 auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord});
275 // If we inserted a new record then we are done.
276 if (Inserted) {
277 return;
278 }
279 memprof::IndexedMemProfRecord &Existing = Iter->second;
280 Existing.merge(NewRecord);
281}
282
283bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
284 const memprof::Frame &Frame,
285 function_ref<void(Error)> Warn) {
286 auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame});
287 // If a mapping already exists for the current frame id and it does not
288 // match the new mapping provided then reset the existing contents and bail
289 // out. We don't support the merging of memprof data whose Frame -> Id
290 // mapping across profiles is inconsistent.
291 if (!Inserted && Iter->second != Frame) {
293 "frame to id mapping mismatch"));
294 return false;
295 }
296 return true;
297}
298
299bool InstrProfWriter::addMemProfCallStack(
300 const memprof::CallStackId CSId,
302 function_ref<void(Error)> Warn) {
303 auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack});
304 // If a mapping already exists for the current call stack id and it does not
305 // match the new mapping provided then reset the existing contents and bail
306 // out. We don't support the merging of memprof data whose CallStack -> Id
307 // mapping across profiles is inconsistent.
308 if (!Inserted && Iter->second != CallStack) {
310 "call stack to id mapping mismatch"));
311 return false;
312 }
313 return true;
314}
315
317 function_ref<void(Error)> Warn) {
318 // Return immediately if everything is empty.
319 if (Incoming.Frames.empty() && Incoming.CallStacks.empty() &&
320 Incoming.Records.empty())
321 return true;
322
323 // Otherwise, every component must be non-empty.
324 assert(!Incoming.Frames.empty() && !Incoming.CallStacks.empty() &&
325 !Incoming.Records.empty());
326
327 if (MemProfData.Frames.empty())
328 MemProfData.Frames = std::move(Incoming.Frames);
329 else
330 for (const auto &[Id, F] : Incoming.Frames)
331 if (addMemProfFrame(Id, F, Warn))
332 return false;
333
334 if (MemProfData.CallStacks.empty())
335 MemProfData.CallStacks = std::move(Incoming.CallStacks);
336 else
337 for (const auto &[CSId, CS] : Incoming.CallStacks)
338 if (addMemProfCallStack(CSId, CS, Warn))
339 return false;
340
341 // Add one record at a time if randomization is requested.
342 if (MemProfData.Records.empty() && !MemprofGenerateRandomHotness) {
343 // Need to manually add each record to the builder, which is otherwise done
344 // in addMemProfRecord.
345 for (const auto &[GUID, Record] : Incoming.Records)
346 MemProfSumBuilder.addRecord(Record);
347 MemProfData.Records = std::move(Incoming.Records);
348 } else {
349 for (const auto &[GUID, Record] : Incoming.Records)
350 addMemProfRecord(GUID, Record);
351 }
352
353 return true;
354}
355
359
361 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfDataIn) {
362 DataAccessProfileData = std::move(DataAccessProfDataIn);
363}
364
366 SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
367 if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
368 TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
369 for (auto &Trace : SrcTraces)
370 if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
371 Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
372 llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
373 // If there are no source traces, it is probably because
374 // --temporal-profile-max-trace-length=0 was set to deliberately remove all
375 // traces. In that case, we do not want to increase the stream size
376 if (SrcTraces.empty())
377 return;
378 // Add traces until our reservoir is full or we run out of source traces
379 auto SrcTraceIt = SrcTraces.begin();
380 while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
381 SrcTraceIt < SrcTraces.end())
382 TemporalProfTraces.push_back(*SrcTraceIt++);
383 // Our reservoir is full, we need to sample the source stream
384 llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
385 for (uint64_t I = TemporalProfTraces.size();
386 I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
387 std::uniform_int_distribution<uint64_t> Distribution(0, I);
388 uint64_t RandomIndex = Distribution(RNG);
389 if (RandomIndex < TemporalProfTraces.size())
390 TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
391 }
392 TemporalProfTraceStreamSize += SrcStreamSize;
393}
394
396 function_ref<void(Error)> Warn) {
397 for (auto &I : IPW.FunctionData)
398 for (auto &Func : I.getValue())
399 addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
400
401 BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
402 for (auto &I : IPW.BinaryIds)
404
405 addTemporalProfileTraces(IPW.TemporalProfTraces,
406 IPW.TemporalProfTraceStreamSize);
407
408 MemProfData.Frames.reserve(IPW.MemProfData.Frames.size());
409 for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) {
410 // If we weren't able to add the frame mappings then it doesn't make sense
411 // to try to merge the records from this profile.
412 if (!addMemProfFrame(FrameId, Frame, Warn))
413 return;
414 }
415
416 MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size());
417 for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) {
418 if (!addMemProfCallStack(CSId, CallStack, Warn))
419 return;
420 }
421
422 MemProfData.Records.reserve(IPW.MemProfData.Records.size());
423 for (auto &[GUID, Record] : IPW.MemProfData.Records) {
424 addMemProfRecord(GUID, Record);
425 }
426}
427
428bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
429 if (!Sparse)
430 return true;
431 for (const auto &Func : PD) {
432 const InstrProfRecord &IPR = Func.second;
433 if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
434 return true;
435 if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))
436 return true;
437 }
438 return false;
439}
440
441static void setSummary(IndexedInstrProf::Summary *TheSummary,
442 ProfileSummary &PS) {
443 using namespace IndexedInstrProf;
444
445 const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
446 TheSummary->NumSummaryFields = Summary::NumKinds;
447 TheSummary->NumCutoffEntries = Res.size();
448 TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
449 TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount());
450 TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount());
451 TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
452 TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts());
453 TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
454 for (unsigned I = 0; I < Res.size(); I++)
455 TheSummary->setEntry(I, Res[I]);
456}
457
458uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
459 const bool WritePrevVersion,
460 ProfOStream &OS) {
461 // Only write out the first four fields.
462 for (int I = 0; I < 4; I++)
463 OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]);
464
465 // Remember the offset of the remaining fields to allow back patching later.
466 auto BackPatchStartOffset = OS.tell();
467
468 // Reserve the space for back patching later.
469 OS.write(0); // HashOffset
470 OS.write(0); // MemProfOffset
471 OS.write(0); // BinaryIdOffset
472 OS.write(0); // TemporalProfTracesOffset
473 if (!WritePrevVersion)
474 OS.write(0); // VTableNamesOffset
475
476 return BackPatchStartOffset;
477}
478
479Error InstrProfWriter::writeBinaryIds(ProfOStream &OS) {
480 // BinaryIdSection has two parts:
481 // 1. uint64_t BinaryIdsSectionSize
482 // 2. list of binary ids that consist of:
483 // a. uint64_t BinaryIdLength
484 // b. uint8_t BinaryIdData
485 // c. uint8_t Padding (if necessary)
486 // Calculate size of binary section.
487 uint64_t BinaryIdsSectionSize = 0;
488
489 // Remove duplicate binary ids.
490 llvm::sort(BinaryIds);
491 BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end());
492
493 for (const auto &BI : BinaryIds) {
494 // Increment by binary id length data type size.
495 BinaryIdsSectionSize += sizeof(uint64_t);
496 // Increment by binary id data length, aligned to 8 bytes.
497 BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));
498 }
499 // Write binary ids section size.
500 OS.write(BinaryIdsSectionSize);
501
502 for (const auto &BI : BinaryIds) {
503 uint64_t BILen = BI.size();
504 // Write binary id length.
505 OS.write(BILen);
506 // Write binary id data.
507 for (unsigned K = 0; K < BILen; K++)
508 OS.writeByte(BI[K]);
509 // Write padding if necessary.
510 uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;
511 for (unsigned K = 0; K < PaddingSize; K++)
512 OS.writeByte(0);
513 }
514
515 return Error::success();
516}
517
518Error InstrProfWriter::writeVTableNames(ProfOStream &OS) {
519 std::vector<std::string> VTableNameStrs;
520 for (StringRef VTableName : VTableNames.keys())
521 VTableNameStrs.push_back(VTableName.str());
522
523 std::string CompressedVTableNames;
524 if (!VTableNameStrs.empty())
526 VTableNameStrs, compression::zlib::isAvailable(),
527 CompressedVTableNames))
528 return E;
529
530 const uint64_t CompressedStringLen = CompressedVTableNames.length();
531
532 // Record the length of compressed string.
533 OS.write(CompressedStringLen);
534
535 // Write the chars in compressed strings.
536 for (auto &c : CompressedVTableNames)
537 OS.writeByte(static_cast<uint8_t>(c));
538
539 // Pad up to a multiple of 8.
540 // InstrProfReader could read bytes according to 'CompressedStringLen'.
541 const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
542
543 for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
544 OS.writeByte(0);
545
546 return Error::success();
547}
548
549Error InstrProfWriter::writeImpl(ProfOStream &OS) {
550 using namespace IndexedInstrProf;
551 using namespace support;
552
553 OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
554
555 InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
556 InfoObj->SummaryBuilder = &ISB;
557 InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
558 InfoObj->CSSummaryBuilder = &CSISB;
559 InfoObj->WritePrevVersion = WritePrevVersion;
560
561 // Populate the hash table generator.
563 for (const auto &I : FunctionData)
564 if (shouldEncodeData(I.getValue()))
565 OrderedData.emplace_back((I.getKey()), &I.getValue());
566 llvm::sort(OrderedData, less_first());
567 for (const auto &I : OrderedData)
568 Generator.insert(I.first, I.second);
569
570 // Write the header.
571 IndexedInstrProf::Header Header;
572 Header.Version = WritePrevVersion
575 // The WritePrevVersion handling will either need to be removed or updated
576 // if the version is advanced beyond 12.
579 if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
580 Header.Version |= VARIANT_MASK_IR_PROF;
581 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
582 Header.Version |= VARIANT_MASK_CSIR_PROF;
583 if (static_cast<bool>(ProfileKind &
585 Header.Version |= VARIANT_MASK_INSTR_ENTRY;
586 if (static_cast<bool>(ProfileKind &
588 Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
589 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
590 Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
591 if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
592 Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
593 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
594 Header.Version |= VARIANT_MASK_MEMPROF;
595 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
596 Header.Version |= VARIANT_MASK_TEMPORAL_PROF;
597
598 const uint64_t BackPatchStartOffset =
599 writeHeader(Header, WritePrevVersion, OS);
600
601 // Reserve space to write profile summary data.
603 uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
604 // Remember the summary offset.
605 uint64_t SummaryOffset = OS.tell();
606 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
607 OS.write(0);
608 uint64_t CSSummaryOffset = 0;
609 uint64_t CSSummarySize = 0;
610 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
611 CSSummaryOffset = OS.tell();
612 CSSummarySize = SummarySize / sizeof(uint64_t);
613 for (unsigned I = 0; I < CSSummarySize; I++)
614 OS.write(0);
615 }
616
617 // Write the hash table.
618 uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
619
620 // Write the MemProf profile data if we have it.
621 uint64_t MemProfSectionStart = 0;
622 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
623 MemProfSectionStart = OS.tell();
624
625 if (auto E = writeMemProf(
626 OS, MemProfData, MemProfVersionRequested, MemProfFullSchema,
627 std::move(DataAccessProfileData), MemProfSumBuilder.getSummary()))
628 return E;
629 }
630
631 uint64_t BinaryIdSectionStart = OS.tell();
632 if (auto E = writeBinaryIds(OS))
633 return E;
634
635 uint64_t VTableNamesSectionStart = OS.tell();
636
637 if (!WritePrevVersion)
638 if (Error E = writeVTableNames(OS))
639 return E;
640
641 uint64_t TemporalProfTracesSectionStart = 0;
642 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
643 TemporalProfTracesSectionStart = OS.tell();
644 OS.write(TemporalProfTraces.size());
645 OS.write(TemporalProfTraceStreamSize);
646 for (auto &Trace : TemporalProfTraces) {
647 OS.write(Trace.Weight);
648 OS.write(Trace.FunctionNameRefs.size());
649 for (auto &NameRef : Trace.FunctionNameRefs)
650 OS.write(NameRef);
651 }
652 }
653
654 // Allocate space for data to be serialized out.
655 std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
657 // Compute the Summary and copy the data to the data
658 // structure to be serialized out (to disk or buffer).
659 std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
660 setSummary(TheSummary.get(), *PS);
661 InfoObj->SummaryBuilder = nullptr;
662
663 // For Context Sensitive summary.
664 std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
665 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
666 TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
667 std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
668 setSummary(TheCSSummary.get(), *CSPS);
669 }
670 InfoObj->CSSummaryBuilder = nullptr;
671
672 SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,
673 BinaryIdSectionStart,
674 TemporalProfTracesSectionStart};
675 if (!WritePrevVersion)
676 HeaderOffsets.push_back(VTableNamesSectionStart);
677
678 PatchItem PatchItems[] = {
679 // Patch the Header fields
680 {BackPatchStartOffset, HeaderOffsets},
681 // Patch the summary data.
682 {SummaryOffset,
683 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()),
684 SummarySize / sizeof(uint64_t))},
685 {CSSummaryOffset,
686 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()),
687 CSSummarySize)}};
688
689 OS.patch(PatchItems);
690
691 for (const auto &I : FunctionData)
692 for (const auto &F : I.getValue())
693 if (Error E = validateRecord(F.second))
694 return E;
695
696 return Error::success();
697}
698
700 // Write the hash table.
701 ProfOStream POS(OS);
702 return writeImpl(POS);
703}
704
706 ProfOStream POS(OS);
707 return writeImpl(POS);
708}
709
710std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
711 std::string Data;
713 // Write the hash table.
714 if (Error E = write(OS))
715 return nullptr;
716 // Return this in an aligned memory buffer.
718}
719
720static const char *ValueProfKindStr[] = {
721#define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
723};
724
726 for (uint32_t VK = 0; VK <= IPVK_Last; VK++) {
727 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
728 continue;
729 uint32_t NS = Func.getNumValueSites(VK);
730 for (uint32_t S = 0; S < NS; S++) {
731 DenseSet<uint64_t> SeenValues;
732 for (const auto &V : Func.getValueArrayForSite(VK, S))
733 if (!SeenValues.insert(V.Value).second)
735 }
736 }
737
738 return Error::success();
739}
740
742 const InstrProfRecord &Func,
743 InstrProfSymtab &Symtab,
744 raw_fd_ostream &OS) {
745 OS << Name << "\n";
746 OS << "# Func Hash:\n" << Hash << "\n";
747 OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
748 OS << "# Counter Values:\n";
749 for (uint64_t Count : Func.Counts)
750 OS << Count << "\n";
751
752 if (Func.BitmapBytes.size() > 0) {
753 OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";
754 OS << "# Bitmap Byte Values:\n";
755 for (uint8_t Byte : Func.BitmapBytes) {
756 OS << "0x";
757 OS.write_hex(Byte);
758 OS << "\n";
759 }
760 OS << "\n";
761 }
762
763 uint32_t NumValueKinds = Func.getNumValueKinds();
764 if (!NumValueKinds) {
765 OS << "\n";
766 return;
767 }
768
769 OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
770 for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
771 uint32_t NS = Func.getNumValueSites(VK);
772 if (!NS)
773 continue;
774 OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
775 OS << "# NumValueSites:\n" << NS << "\n";
776 for (uint32_t S = 0; S < NS; S++) {
777 auto VD = Func.getValueArrayForSite(VK, S);
778 OS << VD.size() << "\n";
779 for (const auto &V : VD) {
780 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
781 OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count
782 << "\n";
783 else
784 OS << V.Value << ":" << V.Count << "\n";
785 }
786 }
787 }
788
789 OS << "\n";
790}
791
793 // Check CS first since it implies an IR level profile.
794 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
795 OS << "# CSIR level Instrumentation Flag\n:csir\n";
796 else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
797 OS << "# IR level Instrumentation Flag\n:ir\n";
798
799 if (static_cast<bool>(ProfileKind &
801 OS << "# Always instrument the function entry block\n:entry_first\n";
802 if (static_cast<bool>(ProfileKind &
804 OS << "# Always instrument the loop entry "
805 "blocks\n:instrument_loop_entries\n";
806 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
807 OS << "# Instrument block coverage\n:single_byte_coverage\n";
808 InstrProfSymtab Symtab;
809
811 using RecordType = std::pair<StringRef, FuncPair>;
812 SmallVector<RecordType, 4> OrderedFuncData;
813
814 for (const auto &I : FunctionData) {
815 if (shouldEncodeData(I.getValue())) {
816 if (Error E = Symtab.addFuncName(I.getKey()))
817 return E;
818 for (const auto &Func : I.getValue())
819 OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
820 }
821 }
822
823 for (const auto &VTableName : VTableNames)
824 if (Error E = Symtab.addVTableName(VTableName.getKey()))
825 return E;
826
827 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
829
830 llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
831 return std::tie(A.first, A.second.first) <
832 std::tie(B.first, B.second.first);
833 });
834
835 for (const auto &record : OrderedFuncData) {
836 const StringRef &Name = record.first;
837 const FuncPair &Func = record.second;
838 writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
839 }
840
841 for (const auto &record : OrderedFuncData) {
842 const FuncPair &Func = record.second;
843 if (Error E = validateRecord(Func.second))
844 return E;
845 }
846
847 return Error::success();
848}
849
851 InstrProfSymtab &Symtab) {
852 OS << ":temporal_prof_traces\n";
853 OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";
854 OS << "# Temporal Profile Trace Stream Size:\n"
855 << TemporalProfTraceStreamSize << "\n";
856 for (auto &Trace : TemporalProfTraces) {
857 OS << "# Weight:\n" << Trace.Weight << "\n";
858 for (auto &NameRef : Trace.FunctionNameRefs)
859 OS << Symtab.getFuncOrVarName(NameRef) << ",";
860 OS << "\n";
861 }
862 OS << "\n";
863}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void setSummary(IndexedInstrProf::Summary *TheSummary, ProfileSummary &PS)
static const char * ValueProfKindStr[]
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_MEMPROF
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
Defines facilities for reading and writing on-disk hash tables.
This file contains some templates that are useful if you are working with the STL at all.
FunctionLoweringInfo::StatepointRelocationRecord RecordType
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
const InstrProfWriter::ProfilingData *const data_type_ref
InstrProfSummaryBuilder * SummaryBuilder
void EmitData(raw_ostream &Out, key_type_ref K, data_type_ref V, offset_type)
static hash_value_type ComputeHash(key_type_ref K)
InstrProfSummaryBuilder * CSSummaryBuilder
std::pair< offset_type, offset_type > EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V)
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N)
const InstrProfWriter::ProfilingData *const data_type
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition InstrProf.h:517
StringRef getFuncOrVarName(uint64_t ValMD5Hash) const
Return name of functions or global variables from the name's md5 hash value.
Definition InstrProf.h:789
StringRef getFuncOrVarNameIfDefined(uint64_t ValMD5Hash) const
Just like getFuncOrVarName, except that it will return literal string 'External Symbol' if the functi...
Definition InstrProf.h:782
Error addVTableName(StringRef VTableName)
Adds VTableName as a known symbol, and inserts it to a map that tracks all vtable names.
Definition InstrProf.h:669
Error addFuncName(StringRef FuncName)
The method name is kept since there are many callers.
Definition InstrProf.h:665
LLVM_ABI InstrProfWriter(bool Sparse=false, uint64_t TemporalProfTraceReservoirSize=0, uint64_t MaxTemporalProfTraceLength=0, bool WritePrevVersion=false, memprof::IndexedVersion MemProfVersionRequested=static_cast< memprof::IndexedVersion >(memprof::MinimumSupportedVersion), bool MemProfFullSchema=false, bool MemprofGenerateRandomHotness=false, unsigned MemprofGenerateRandomHotnessSeed=0)
LLVM_ABI Error write(raw_fd_ostream &OS)
Write the profile to OS.
LLVM_ABI void addTemporalProfileTraces(SmallVectorImpl< TemporalProfTraceTy > &SrcTraces, uint64_t SrcStreamSize)
Add SrcTraces using reservoir sampling where SrcStreamSize is the total number of temporal profiling ...
LLVM_ABI void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap, const OverlapFuncFilters &FuncFilter)
LLVM_ABI Error writeText(raw_fd_ostream &OS)
Write the profile in text format to OS.
LLVM_ABI void addBinaryIds(ArrayRef< llvm::object::BuildID > BIs)
static LLVM_ABI void writeRecordInText(StringRef Name, uint64_t Hash, const InstrProfRecord &Counters, InstrProfSymtab &Symtab, raw_fd_ostream &OS)
Write Record in text format to OS.
LLVM_ABI void setValueProfDataEndianness(llvm::endianness Endianness)
LLVM_ABI void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, function_ref< void(Error)> Warn)
Add function counts for the given function.
LLVM_ABI void mergeRecordsFromWriter(InstrProfWriter &&IPW, function_ref< void(Error)> Warn)
Merge existing function counts from the given writer.
LLVM_ABI void writeTextTemporalProfTraceData(raw_fd_ostream &OS, InstrProfSymtab &Symtab)
Write temporal profile trace data to the header in text format to OS.
SmallDenseMap< uint64_t, InstrProfRecord > ProfilingData
LLVM_ABI std::unique_ptr< MemoryBuffer > writeBuffer()
Write the profile, returning the raw data. For testing.
LLVM_ABI void setOutputSparse(bool Sparse)
LLVM_ABI bool addMemProfData(memprof::IndexedMemProfData Incoming, function_ref< void(Error)> Warn)
Add the entire MemProfData Incoming to the writer context.
LLVM_ABI void addDataAccessProfData(std::unique_ptr< memprof::DataAccessProfData > DataAccessProfile)
LLVM_ABI Error validateRecord(const InstrProfRecord &Func)
static std::unique_ptr< MemoryBuffer > getMemBufferCopy(StringRef InputData, const Twine &BufferName="")
Open the specified memory range as a MemoryBuffer, copying the contents and taking ownership of it.
offset_type Emit(raw_ostream &Out)
Emit the table to Out, which must not be at offset 0.
raw_ostream & OS
Definition InstrProf.h:87
LLVM_ABI uint64_t tell() const
LLVM_ABI void writeByte(uint8_t V)
LLVM_ABI void patch(ArrayRef< PatchItem > P)
LLVM_ABI void write(uint64_t V)
static LLVM_ABI const ArrayRef< uint32_t > DefaultCutoffs
A vector of useful cutoff values for detailed summary.
uint64_t getTotalCount() const
uint64_t getMaxCount() const
const SummaryEntryVector & getDetailedSummary()
uint32_t getNumCounts() const
uint64_t getMaxInternalCount() const
uint64_t getMaxFunctionCount() const
uint32_t getNumFunctions() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
unsigned size() const
Definition Trace.h:96
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to a file descriptor.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & write_hex(unsigned long long N)
Output N in hexadecimal, without any prefix or padding.
raw_ostream & write(unsigned char C)
A raw_ostream that writes to an std::string.
std::unique_ptr< Summary > allocSummary(uint32_t TotalSize)
Definition InstrProf.h:1358
uint64_t ComputeHash(StringRef K)
Definition InstrProf.h:1239
LLVM_ABI bool isAvailable()
uint64_t CallStackId
Definition MemProf.h:355
uint64_t FrameId
Definition MemProf.h:236
This is an optimization pass for GlobalISel generic memory operations.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition STLExtras.h:1530
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
constexpr T alignToPowerOf2(U Value, V Align)
Will overflow only if result is not representable in T.
Definition MathExtras.h:493
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
instrprof_error
Definition InstrProf.h:410
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
LLVM_ABI Error collectGlobalObjectNameStrings(ArrayRef< std::string > NameStrs, bool doCompression, std::string &Result)
Given a vector of strings (names of global objects like functions or, virtual tables) NameStrs,...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, std::unique_ptr< memprof::DataAccessProfData > DataAccessProfileData, std::unique_ptr< memprof::MemProfSummary > MemProfSum)
endianness
Definition bit.h:71
#define N
void set(SummaryFieldKind K, uint64_t V)
Definition InstrProf.h:1344
void setEntry(uint32_t I, const ProfileSummaryEntry &E)
Definition InstrProf.h:1350
Profiling information for a single function.
Definition InstrProf.h:906
std::vector< uint64_t > Counts
Definition InstrProf.h:907
LLVM_ABI void merge(InstrProfRecord &Other, uint64_t Weight, function_ref< void(instrprof_error)> Warn)
Merge the counts in Other into this one.
std::vector< uint8_t > UniformityBits
For AMDGPU offload profiling: 1 bit per basic block indicating whether the block is usually entered w...
Definition InstrProf.h:915
LLVM_ABI void overlap(InstrProfRecord &Other, OverlapStats &Overlap, OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff)
Compute the overlap b/w this IntrprofRecord and Other.
void sortValueData()
Sort value profile data (per site) by count.
Definition InstrProf.h:996
std::vector< uint8_t > BitmapBytes
Definition InstrProf.h:908
LLVM_ABI void scale(uint64_t N, uint64_t D, function_ref< void(instrprof_error)> Warn)
Scale up profile counts (including value profile data) by a factor of (N / D).
static bool hasCSFlagInHash(uint64_t FuncHash)
Definition InstrProf.h:1125
const std::string NameFilter
Definition InstrProf.h:871
LLVM_ABI void addOneMismatch(const CountSumOrPercent &MismatchFunc)
CountSumOrPercent Overlap
Definition InstrProf.h:835
LLVM_ABI void addOneUnique(const CountSumOrPercent &UniqueFunc)
CountSumOrPercent Test
Definition InstrProf.h:833
llvm::MapVector< CallStackId, llvm::SmallVector< FrameId > > CallStacks
llvm::MapVector< GlobalValue::GUID, IndexedMemProfRecord > Records
llvm::MapVector< FrameId, Frame > Frames
void merge(const IndexedMemProfRecord &Other)
Definition MemProf.h:454
Adapter to write values to a stream in a particular byte order.