Bug Summary

File:build/source/llvm/tools/llvm-profdata/llvm-profdata.cpp
Warning:line 549, column 5
Value stored to 'Threshold' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name llvm-profdata.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/llvm-profdata -I /build/source/llvm/tools/llvm-profdata -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/tools/llvm-profdata/llvm-profdata.cpp
1//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-profdata merges .profdata files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/IR/LLVMContext.h"
17#include "llvm/Object/Binary.h"
18#include "llvm/ProfileData/InstrProfCorrelator.h"
19#include "llvm/ProfileData/InstrProfReader.h"
20#include "llvm/ProfileData/InstrProfWriter.h"
21#include "llvm/ProfileData/MemProf.h"
22#include "llvm/ProfileData/ProfileCommon.h"
23#include "llvm/ProfileData/RawMemProfReader.h"
24#include "llvm/ProfileData/SampleProfReader.h"
25#include "llvm/ProfileData/SampleProfWriter.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Discriminator.h"
28#include "llvm/Support/Errc.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/Format.h"
31#include "llvm/Support/FormattedStream.h"
32#include "llvm/Support/InitLLVM.h"
33#include "llvm/Support/LLVMDriver.h"
34#include "llvm/Support/MD5.h"
35#include "llvm/Support/MemoryBuffer.h"
36#include "llvm/Support/Path.h"
37#include "llvm/Support/ThreadPool.h"
38#include "llvm/Support/Threading.h"
39#include "llvm/Support/VirtualFileSystem.h"
40#include "llvm/Support/WithColor.h"
41#include "llvm/Support/raw_ostream.h"
42#include <algorithm>
43#include <cmath>
44#include <optional>
45#include <queue>
46
47using namespace llvm;
48
49// We use this string to indicate that there are
50// multiple static functions map to the same name.
51const std::string DuplicateNameStr = "----";
52
53enum ProfileFormat {
54 PF_None = 0,
55 PF_Text,
56 PF_Compact_Binary, // Deprecated
57 PF_Ext_Binary,
58 PF_GCC,
59 PF_Binary
60};
61
62enum class ShowFormat { Text, Json, Yaml };
63
64static void warn(Twine Message, std::string Whence = "",
65 std::string Hint = "") {
66 WithColor::warning();
67 if (!Whence.empty())
68 errs() << Whence << ": ";
69 errs() << Message << "\n";
70 if (!Hint.empty())
71 WithColor::note() << Hint << "\n";
72}
73
74static void warn(Error E, StringRef Whence = "") {
75 if (E.isA<InstrProfError>()) {
76 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
77 warn(IPE.message(), std::string(Whence), std::string(""));
78 });
79 }
80}
81
82static void exitWithError(Twine Message, std::string Whence = "",
83 std::string Hint = "") {
84 WithColor::error();
85 if (!Whence.empty())
86 errs() << Whence << ": ";
87 errs() << Message << "\n";
88 if (!Hint.empty())
89 WithColor::note() << Hint << "\n";
90 ::exit(1);
91}
92
93static void exitWithError(Error E, StringRef Whence = "") {
94 if (E.isA<InstrProfError>()) {
95 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
96 instrprof_error instrError = IPE.get();
97 StringRef Hint = "";
98 if (instrError == instrprof_error::unrecognized_format) {
99 // Hint in case user missed specifying the profile type.
100 Hint = "Perhaps you forgot to use the --sample or --memory option?";
101 }
102 exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
103 });
104 return;
105 }
106
107 exitWithError(toString(std::move(E)), std::string(Whence));
108}
109
110static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
111 exitWithError(EC.message(), std::string(Whence));
112}
113
114namespace {
115enum ProfileKinds { instr, sample, memory };
116enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid };
117}
118
119static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
120 StringRef Whence = "") {
121 if (FailMode == failIfAnyAreInvalid)
122 exitWithErrorCode(EC, Whence);
123 else
124 warn(EC.message(), std::string(Whence));
125}
126
127static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
128 StringRef WhenceFunction = "",
129 bool ShowHint = true) {
130 if (!WhenceFile.empty())
131 errs() << WhenceFile << ": ";
132 if (!WhenceFunction.empty())
133 errs() << WhenceFunction << ": ";
134
135 auto IPE = instrprof_error::success;
136 E = handleErrors(std::move(E),
137 [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
138 IPE = E->get();
139 return Error(std::move(E));
140 });
141 errs() << toString(std::move(E)) << "\n";
142
143 if (ShowHint) {
144 StringRef Hint = "";
145 if (IPE != instrprof_error::success) {
146 switch (IPE) {
147 case instrprof_error::hash_mismatch:
148 case instrprof_error::count_mismatch:
149 case instrprof_error::value_site_count_mismatch:
150 Hint = "Make sure that all profile data to be merged is generated "
151 "from the same binary.";
152 break;
153 default:
154 break;
155 }
156 }
157
158 if (!Hint.empty())
159 errs() << Hint << "\n";
160 }
161}
162
163namespace {
164/// A remapper from original symbol names to new symbol names based on a file
165/// containing a list of mappings from old name to new name.
166class SymbolRemapper {
167 std::unique_ptr<MemoryBuffer> File;
168 DenseMap<StringRef, StringRef> RemappingTable;
169
170public:
171 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
172 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
173 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
174 if (!BufOrError)
175 exitWithErrorCode(BufOrError.getError(), InputFile);
176
177 auto Remapper = std::make_unique<SymbolRemapper>();
178 Remapper->File = std::move(BufOrError.get());
179
180 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
181 !LineIt.is_at_eof(); ++LineIt) {
182 std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
183 if (Parts.first.empty() || Parts.second.empty() ||
184 Parts.second.count(' ')) {
185 exitWithError("unexpected line in remapping file",
186 (InputFile + ":" + Twine(LineIt.line_number())).str(),
187 "expected 'old_symbol new_symbol'");
188 }
189 Remapper->RemappingTable.insert(Parts);
190 }
191 return Remapper;
192 }
193
194 /// Attempt to map the given old symbol into a new symbol.
195 ///
196 /// \return The new symbol, or \p Name if no such symbol was found.
197 StringRef operator()(StringRef Name) {
198 StringRef New = RemappingTable.lookup(Name);
199 return New.empty() ? Name : New;
200 }
201};
202}
203
204struct WeightedFile {
205 std::string Filename;
206 uint64_t Weight;
207};
208typedef SmallVector<WeightedFile, 5> WeightedFileVector;
209
210/// Keep track of merged data and reported errors.
211struct WriterContext {
212 std::mutex Lock;
213 InstrProfWriter Writer;
214 std::vector<std::pair<Error, std::string>> Errors;
215 std::mutex &ErrLock;
216 SmallSet<instrprof_error, 4> &WriterErrorCodes;
217
218 WriterContext(bool IsSparse, std::mutex &ErrLock,
219 SmallSet<instrprof_error, 4> &WriterErrorCodes,
220 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
221 : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock),
222 WriterErrorCodes(WriterErrorCodes) {}
223};
224
225/// Computer the overlap b/w profile BaseFilename and TestFileName,
226/// and store the program level result to Overlap.
227static void overlapInput(const std::string &BaseFilename,
228 const std::string &TestFilename, WriterContext *WC,
229 OverlapStats &Overlap,
230 const OverlapFuncFilters &FuncFilter,
231 raw_fd_ostream &OS, bool IsCS) {
232 auto FS = vfs::getRealFileSystem();
233 auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
234 if (Error E = ReaderOrErr.takeError()) {
235 // Skip the empty profiles by returning sliently.
236 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
237 if (ErrorCode != instrprof_error::empty_raw_profile)
238 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
239 TestFilename);
240 return;
241 }
242
243 auto Reader = std::move(ReaderOrErr.get());
244 for (auto &I : *Reader) {
245 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
246 FuncOverlap.setFuncInfo(I.Name, I.Hash);
247
248 WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
249 FuncOverlap.dump(OS);
250 }
251}
252
253/// Load an input into a writer context.
254static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
255 const InstrProfCorrelator *Correlator,
256 const StringRef ProfiledBinary, WriterContext *WC) {
257 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
258
259 // Copy the filename, because llvm::ThreadPool copied the input "const
260 // WeightedFile &" by value, making a reference to the filename within it
261 // invalid outside of this packaged task.
262 std::string Filename = Input.Filename;
263
264 using ::llvm::memprof::RawMemProfReader;
265 if (RawMemProfReader::hasFormat(Input.Filename)) {
266 auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
267 if (!ReaderOrErr) {
268 exitWithError(ReaderOrErr.takeError(), Input.Filename);
269 }
270 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
271 // Check if the profile types can be merged, e.g. clang frontend profiles
272 // should not be merged with memprof profiles.
273 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
274 consumeError(std::move(E));
275 WC->Errors.emplace_back(
276 make_error<StringError>(
277 "Cannot merge MemProf profile with Clang generated profile.",
278 std::error_code()),
279 Filename);
280 return;
281 }
282
283 auto MemProfError = [&](Error E) {
284 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
285 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
286 Filename);
287 };
288
289 // Add the frame mappings into the writer context.
290 const auto &IdToFrame = Reader->getFrameMapping();
291 for (const auto &I : IdToFrame) {
292 bool Succeeded = WC->Writer.addMemProfFrame(
293 /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
294 // If we weren't able to add the frame mappings then it doesn't make sense
295 // to try to add the records from this profile.
296 if (!Succeeded)
297 return;
298 }
299 const auto &FunctionProfileData = Reader->getProfileData();
300 // Add the memprof records into the writer context.
301 for (const auto &I : FunctionProfileData) {
302 WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second);
303 }
304 return;
305 }
306
307 auto FS = vfs::getRealFileSystem();
308 auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator);
309 if (Error E = ReaderOrErr.takeError()) {
310 // Skip the empty profiles by returning silently.
311 auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
312 if (ErrCode != instrprof_error::empty_raw_profile)
313 WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
314 Filename);
315 return;
316 }
317
318 auto Reader = std::move(ReaderOrErr.get());
319 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
320 consumeError(std::move(E));
321 WC->Errors.emplace_back(
322 make_error<StringError>(
323 "Merge IR generated profile with Clang generated profile.",
324 std::error_code()),
325 Filename);
326 return;
327 }
328
329 for (auto &I : *Reader) {
330 if (Remapper)
331 I.Name = (*Remapper)(I.Name);
332 const StringRef FuncName = I.Name;
333 bool Reported = false;
334 WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
335 if (Reported) {
336 consumeError(std::move(E));
337 return;
338 }
339 Reported = true;
340 // Only show hint the first time an error occurs.
341 auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
342 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
343 bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
344 handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
345 Input.Filename, FuncName, firstTime);
346 });
347 }
348
349 if (Reader->hasTemporalProfile()) {
350 auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
351 if (!Traces.empty())
352 WC->Writer.addTemporalProfileTraces(
353 Traces, Reader->getTemporalProfTraceStreamSize());
354 }
355 if (Reader->hasError()) {
356 if (Error E = Reader->getError())
357 WC->Errors.emplace_back(std::move(E), Filename);
358 }
359
360 std::vector<llvm::object::BuildID> BinaryIds;
361 if (Error E = Reader->readBinaryIds(BinaryIds))
362 WC->Errors.emplace_back(std::move(E), Filename);
363 WC->Writer.addBinaryIds(BinaryIds);
364}
365
366/// Merge the \p Src writer context into \p Dst.
367static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
368 for (auto &ErrorPair : Src->Errors)
369 Dst->Errors.push_back(std::move(ErrorPair));
370 Src->Errors.clear();
371
372 if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
373 exitWithError(std::move(E));
374
375 Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
376 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
377 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
378 bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
379 if (firstTime)
380 warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
381 });
382}
383
384static void writeInstrProfile(StringRef OutputFilename,
385 ProfileFormat OutputFormat,
386 InstrProfWriter &Writer) {
387 std::error_code EC;
388 raw_fd_ostream Output(OutputFilename.data(), EC,
389 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
390 : sys::fs::OF_None);
391 if (EC)
392 exitWithErrorCode(EC, OutputFilename);
393
394 if (OutputFormat == PF_Text) {
395 if (Error E = Writer.writeText(Output))
396 warn(std::move(E));
397 } else {
398 if (Output.is_displayed())
399 exitWithError("cannot write a non-text format profile to the terminal");
400 if (Error E = Writer.write(Output))
401 warn(std::move(E));
402 }
403}
404
405static void
406mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename,
407 SymbolRemapper *Remapper, StringRef OutputFilename,
408 ProfileFormat OutputFormat, uint64_t TraceReservoirSize,
409 uint64_t MaxTraceLength, bool OutputSparse,
410 unsigned NumThreads, FailureMode FailMode,
411 const StringRef ProfiledBinary) {
412 if (OutputFormat == PF_Compact_Binary)
413 exitWithError("Compact Binary is deprecated");
414 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
415 OutputFormat != PF_Text)
416 exitWithError("unknown format is specified");
417
418 std::unique_ptr<InstrProfCorrelator> Correlator;
419 if (!DebugInfoFilename.empty()) {
420 if (auto Err =
421 InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator))
422 exitWithError(std::move(Err), DebugInfoFilename);
423 if (auto Err = Correlator->correlateProfileData())
424 exitWithError(std::move(Err), DebugInfoFilename);
425 }
426
427 std::mutex ErrorLock;
428 SmallSet<instrprof_error, 4> WriterErrorCodes;
429
430 // If NumThreads is not specified, auto-detect a good default.
431 if (NumThreads == 0)
432 NumThreads = std::min(hardware_concurrency().compute_thread_count(),
433 unsigned((Inputs.size() + 1) / 2));
434
435 // Initialize the writer contexts.
436 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
437 for (unsigned I = 0; I < NumThreads; ++I)
438 Contexts.emplace_back(std::make_unique<WriterContext>(
439 OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
440 MaxTraceLength));
441
442 if (NumThreads == 1) {
443 for (const auto &Input : Inputs)
444 loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
445 Contexts[0].get());
446 } else {
447 ThreadPool Pool(hardware_concurrency(NumThreads));
448
449 // Load the inputs in parallel (N/NumThreads serial steps).
450 unsigned Ctx = 0;
451 for (const auto &Input : Inputs) {
452 Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
453 Contexts[Ctx].get());
454 Ctx = (Ctx + 1) % NumThreads;
455 }
456 Pool.wait();
457
458 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
459 unsigned Mid = Contexts.size() / 2;
460 unsigned End = Contexts.size();
461 assert(Mid > 0 && "Expected more than one context")(static_cast <bool> (Mid > 0 && "Expected more than one context"
) ? void (0) : __assert_fail ("Mid > 0 && \"Expected more than one context\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 461, __extension__
__PRETTY_FUNCTION__))
;
462 do {
463 for (unsigned I = 0; I < Mid; ++I)
464 Pool.async(mergeWriterContexts, Contexts[I].get(),
465 Contexts[I + Mid].get());
466 Pool.wait();
467 if (End & 1) {
468 Pool.async(mergeWriterContexts, Contexts[0].get(),
469 Contexts[End - 1].get());
470 Pool.wait();
471 }
472 End = Mid;
473 Mid /= 2;
474 } while (Mid > 0);
475 }
476
477 // Handle deferred errors encountered during merging. If the number of errors
478 // is equal to the number of inputs the merge failed.
479 unsigned NumErrors = 0;
480 for (std::unique_ptr<WriterContext> &WC : Contexts) {
481 for (auto &ErrorPair : WC->Errors) {
482 ++NumErrors;
483 warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
484 }
485 }
486 if (NumErrors == Inputs.size() ||
487 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
488 exitWithError("no profile can be merged");
489
490 writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
491}
492
493/// The profile entry for a function in instrumentation profile.
494struct InstrProfileEntry {
495 uint64_t MaxCount = 0;
496 uint64_t NumEdgeCounters = 0;
497 float ZeroCounterRatio = 0.0;
498 InstrProfRecord *ProfRecord;
499 InstrProfileEntry(InstrProfRecord *Record);
500 InstrProfileEntry() = default;
501};
502
503InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
504 ProfRecord = Record;
505 uint64_t CntNum = Record->Counts.size();
506 uint64_t ZeroCntNum = 0;
507 for (size_t I = 0; I < CntNum; ++I) {
508 MaxCount = std::max(MaxCount, Record->Counts[I]);
509 ZeroCntNum += !Record->Counts[I];
510 }
511 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
512 NumEdgeCounters = CntNum;
513}
514
515/// Either set all the counters in the instr profile entry \p IFE to
516/// -1 / -2 /in order to drop the profile or scale up the
517/// counters in \p IFP to be above hot / cold threshold. We use
518/// the ratio of zero counters in the profile of a function to
519/// decide the profile is helpful or harmful for performance,
520/// and to choose whether to scale up or drop it.
521static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
522 uint64_t HotInstrThreshold,
523 uint64_t ColdInstrThreshold,
524 float ZeroCounterThreshold) {
525 InstrProfRecord *ProfRecord = IFE.ProfRecord;
526 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
527 // If all or most of the counters of the function are zero, the
528 // profile is unaccountable and should be dropped. Reset all the
529 // counters to be -1 / -2 and PGO profile-use will drop the profile.
530 // All counters being -1 also implies that the function is hot so
531 // PGO profile-use will also set the entry count metadata to be
532 // above hot threshold.
533 // All counters being -2 implies that the function is warm so
534 // PGO profile-use will also set the entry count metadata to be
535 // above cold threshold.
536 auto Kind =
537 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
538 ProfRecord->setPseudoCount(Kind);
539 return;
540 }
541
542 // Scale up the MaxCount to be multiple times above hot / cold threshold.
543 const unsigned MultiplyFactor = 3;
544 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
545 uint64_t Numerator = Threshold * MultiplyFactor;
546
547 // Make sure Threshold for warm counters is below the HotInstrThreshold.
548 if (!SetToHot && Threshold >= HotInstrThreshold) {
549 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
Value stored to 'Threshold' is never read
550 }
551
552 uint64_t Denominator = IFE.MaxCount;
553 if (Numerator <= Denominator)
554 return;
555 ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
556 warn(toString(make_error<InstrProfError>(E)));
557 });
558}
559
560const uint64_t ColdPercentileIdx = 15;
561const uint64_t HotPercentileIdx = 11;
562
563using sampleprof::FSDiscriminatorPass;
564
565// Internal options to set FSDiscriminatorPass. Used in merge and show
566// commands.
567static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
568 "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
569 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
570 "pass beyond this value. The enum values are defined in "
571 "Support/Discriminator.h"),
572 cl::values(clEnumVal(Base, "Use base discriminators only")llvm::cl::OptionEnumValue { "Base", int(Base), "Use base discriminators only"
}
,
573 clEnumVal(Pass1, "Use base and pass 1 discriminators")llvm::cl::OptionEnumValue { "Pass1", int(Pass1), "Use base and pass 1 discriminators"
}
,
574 clEnumVal(Pass2, "Use base and pass 1-2 discriminators")llvm::cl::OptionEnumValue { "Pass2", int(Pass2), "Use base and pass 1-2 discriminators"
}
,
575 clEnumVal(Pass3, "Use base and pass 1-3 discriminators")llvm::cl::OptionEnumValue { "Pass3", int(Pass3), "Use base and pass 1-3 discriminators"
}
,
576 clEnumVal(PassLast, "Use all discriminator bits (default)")llvm::cl::OptionEnumValue { "PassLast", int(PassLast), "Use all discriminator bits (default)"
}
));
577
578static unsigned getDiscriminatorMask() {
579 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
580}
581
582/// Adjust the instr profile in \p WC based on the sample profile in
583/// \p Reader.
584static void
585adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
586 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
587 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
588 unsigned InstrProfColdThreshold) {
589 // Function to its entry in instr profile.
590 StringMap<InstrProfileEntry> InstrProfileMap;
591 StringMap<StringRef> StaticFuncMap;
592 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
593
594 auto checkSampleProfileHasFUnique = [&Reader]() {
595 for (const auto &PD : Reader->getProfiles()) {
596 auto &FContext = PD.first;
597 if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
598 std::string::npos) {
599 return true;
600 }
601 }
602 return false;
603 };
604
605 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
606
607 auto buildStaticFuncMap = [&StaticFuncMap,
608 SampleProfileHasFUnique](const StringRef Name) {
609 std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"};
610 size_t PrefixPos = StringRef::npos;
611 for (auto &Prefix : Prefixes) {
612 PrefixPos = Name.find_insensitive(Prefix);
613 if (PrefixPos == StringRef::npos)
614 continue;
615 PrefixPos += Prefix.size();
616 break;
617 }
618
619 if (PrefixPos == StringRef::npos) {
620 return;
621 }
622
623 StringRef NewName = Name.drop_front(PrefixPos);
624 StringRef FName = Name.substr(0, PrefixPos - 1);
625 if (NewName.size() == 0) {
626 return;
627 }
628
629 // This name should have a static linkage.
630 size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
631 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
632
633 // If sample profile and instrumented profile do not agree on symbol
634 // uniqification.
635 if (SampleProfileHasFUnique != ProfileHasFUnique) {
636 // If instrumented profile uses -funique-internal-linakge-symbols,
637 // we need to trim the name.
638 if (ProfileHasFUnique) {
639 NewName = NewName.substr(0, PostfixPos);
640 } else {
641 // If sample profile uses -funique-internal-linakge-symbols,
642 // we build the map.
643 std::string NStr =
644 NewName.str() + getUniqueInternalLinkagePostfix(FName);
645 NewName = StringRef(NStr);
646 StaticFuncMap[NewName] = Name;
647 return;
648 }
649 }
650
651 if (!StaticFuncMap.contains(NewName)) {
652 StaticFuncMap[NewName] = Name;
653 } else {
654 StaticFuncMap[NewName] = DuplicateNameStr;
655 }
656 };
657
658 // We need to flatten the SampleFDO profile as the InstrFDO
659 // profile does not have inlined callsite profiles.
660 // One caveat is the pre-inlined function -- their samples
661 // should be collapsed into the caller function.
662 // Here we do a DFS traversal to get the flatten profile
663 // info: the sum of entrycount and the max of maxcount.
664 // Here is the algorithm:
665 // recursive (FS, root_name) {
666 // name = FS->getName();
667 // get samples for FS;
668 // if (InstrProf.find(name) {
669 // root_name = name;
670 // } else {
671 // if (name is in static_func map) {
672 // root_name = static_name;
673 // }
674 // }
675 // update the Map entry for root_name;
676 // for (subfs: FS) {
677 // recursive(subfs, root_name);
678 // }
679 // }
680 //
681 // Here is an example.
682 //
683 // SampleProfile:
684 // foo:12345:1000
685 // 1: 1000
686 // 2.1: 1000
687 // 15: 5000
688 // 4: bar:1000
689 // 1: 1000
690 // 2: goo:3000
691 // 1: 3000
692 // 8: bar:40000
693 // 1: 10000
694 // 2: goo:30000
695 // 1: 30000
696 //
697 // InstrProfile has two entries:
698 // foo
699 // bar.cc:bar
700 //
701 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
702 // {"foo", {1000, 5000}}
703 // {"bar.cc:bar", {11000, 30000}}
704 //
705 // foo's has an entry count of 1000, and max body count of 5000.
706 // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and
707 // 10000), and max count of 30000 (from the callsite in line 8).
708 //
709 // Note that goo's count will remain in bar.cc:bar() as it does not have an
710 // entry in InstrProfile.
711 DenseMap<StringRef, std::pair<uint64_t, uint64_t>> FlattenSampleMap;
712 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
713 &InstrProfileMap](const FunctionSamples &FS,
714 const StringRef &RootName) {
715 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
716 const StringRef &RootName,
717 auto &BuildImpl) -> void {
718 const StringRef &Name = FS.getName();
719 const StringRef *NewRootName = &RootName;
720 uint64_t EntrySample = FS.getHeadSamplesEstimate();
721 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
722
723 auto It = InstrProfileMap.find(Name);
724 if (It != InstrProfileMap.end()) {
725 NewRootName = &Name;
726 } else {
727 auto NewName = StaticFuncMap.find(Name);
728 if (NewName != StaticFuncMap.end()) {
729 It = InstrProfileMap.find(NewName->second.str());
730 if (NewName->second != DuplicateNameStr) {
731 NewRootName = &NewName->second;
732 }
733 } else {
734 // Here the EntrySample is of an inlined function, so we should not
735 // update the EntrySample in the map.
736 EntrySample = 0;
737 }
738 }
739 EntrySample += FlattenSampleMap[*NewRootName].first;
740 MaxBodySample =
741 std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
742 FlattenSampleMap[*NewRootName] =
743 std::make_pair(EntrySample, MaxBodySample);
744
745 for (const auto &C : FS.getCallsiteSamples())
746 for (const auto &F : C.second)
747 BuildImpl(F.second, *NewRootName, BuildImpl);
748 };
749 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
750 };
751
752 for (auto &PD : WC->Writer.getProfileData()) {
753 // Populate IPBuilder.
754 for (const auto &PDV : PD.getValue()) {
755 InstrProfRecord Record = PDV.second;
756 IPBuilder.addRecord(Record);
757 }
758
759 // If a function has multiple entries in instr profile, skip it.
760 if (PD.getValue().size() != 1)
761 continue;
762
763 // Initialize InstrProfileMap.
764 InstrProfRecord *R = &PD.getValue().begin()->second;
765 StringRef FullName = PD.getKey();
766 InstrProfileMap[FullName] = InstrProfileEntry(R);
767 buildStaticFuncMap(FullName);
768 }
769
770 for (auto &PD : Reader->getProfiles()) {
771 sampleprof::FunctionSamples &FS = PD.second;
772 BuildMaxSampleMap(FS, FS.getName());
773 }
774
775 ProfileSummary InstrPS = *IPBuilder.getSummary();
776 ProfileSummary SamplePS = Reader->getSummary();
777
778 // Compute cold thresholds for instr profile and sample profile.
779 uint64_t HotSampleThreshold =
780 ProfileSummaryBuilder::getEntryForPercentile(
781 SamplePS.getDetailedSummary(),
782 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
783 .MinCount;
784 uint64_t ColdSampleThreshold =
785 ProfileSummaryBuilder::getEntryForPercentile(
786 SamplePS.getDetailedSummary(),
787 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
788 .MinCount;
789 uint64_t HotInstrThreshold =
790 ProfileSummaryBuilder::getEntryForPercentile(
791 InstrPS.getDetailedSummary(),
792 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
793 .MinCount;
794 uint64_t ColdInstrThreshold =
795 InstrProfColdThreshold
796 ? InstrProfColdThreshold
797 : ProfileSummaryBuilder::getEntryForPercentile(
798 InstrPS.getDetailedSummary(),
799 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
800 .MinCount;
801
802 // Find hot/warm functions in sample profile which is cold in instr profile
803 // and adjust the profiles of those functions in the instr profile.
804 for (const auto &E : FlattenSampleMap) {
805 uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
806 if (SampleMaxCount < ColdSampleThreshold)
807 continue;
808 const StringRef &Name = E.first;
809 auto It = InstrProfileMap.find(Name);
810 if (It == InstrProfileMap.end()) {
811 auto NewName = StaticFuncMap.find(Name);
812 if (NewName != StaticFuncMap.end()) {
813 It = InstrProfileMap.find(NewName->second.str());
814 if (NewName->second == DuplicateNameStr) {
815 WithColor::warning()
816 << "Static function " << Name
817 << " has multiple promoted names, cannot adjust profile.\n";
818 }
819 }
820 }
821 if (It == InstrProfileMap.end() ||
822 It->second.MaxCount > ColdInstrThreshold ||
823 It->second.NumEdgeCounters < SupplMinSizeThreshold)
824 continue;
825 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
826 updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
827 ColdInstrThreshold, ZeroCounterThreshold);
828 }
829}
830
831/// The main function to supplement instr profile with sample profile.
832/// \Inputs contains the instr profile. \p SampleFilename specifies the
833/// sample profile. \p OutputFilename specifies the output profile name.
834/// \p OutputFormat specifies the output profile format. \p OutputSparse
835/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
836/// specifies the minimal size for the functions whose profile will be
837/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
838/// a function contains too many zero counters and whether its profile
839/// should be dropped. \p InstrProfColdThreshold is the user specified
840/// cold threshold which will override the cold threshold got from the
841/// instr profile summary.
842static void supplementInstrProfile(
843 const WeightedFileVector &Inputs, StringRef SampleFilename,
844 StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
845 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
846 unsigned InstrProfColdThreshold) {
847 if (OutputFilename.compare("-") == 0)
848 exitWithError("cannot write indexed profdata format to stdout");
849 if (Inputs.size() != 1)
850 exitWithError("expect one input to be an instr profile");
851 if (Inputs[0].Weight != 1)
852 exitWithError("expect instr profile doesn't have weight");
853
854 StringRef InstrFilename = Inputs[0].Filename;
855
856 // Read sample profile.
857 LLVMContext Context;
858 auto FS = vfs::getRealFileSystem();
859 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
860 SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
861 if (std::error_code EC = ReaderOrErr.getError())
862 exitWithErrorCode(EC, SampleFilename);
863 auto Reader = std::move(ReaderOrErr.get());
864 if (std::error_code EC = Reader->read())
865 exitWithErrorCode(EC, SampleFilename);
866
867 // Read instr profile.
868 std::mutex ErrorLock;
869 SmallSet<instrprof_error, 4> WriterErrorCodes;
870 auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
871 WriterErrorCodes);
872 loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
873 if (WC->Errors.size() > 0)
874 exitWithError(std::move(WC->Errors[0].first), InstrFilename);
875
876 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
877 InstrProfColdThreshold);
878 writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
879}
880
881/// Make a copy of the given function samples with all symbol names remapped
882/// by the provided symbol remapper.
883static sampleprof::FunctionSamples
884remapSamples(const sampleprof::FunctionSamples &Samples,
885 SymbolRemapper &Remapper, sampleprof_error &Error) {
886 sampleprof::FunctionSamples Result;
887 Result.setName(Remapper(Samples.getName()));
888 Result.addTotalSamples(Samples.getTotalSamples());
889 Result.addHeadSamples(Samples.getHeadSamples());
890 for (const auto &BodySample : Samples.getBodySamples()) {
891 uint32_t MaskedDiscriminator =
892 BodySample.first.Discriminator & getDiscriminatorMask();
893 Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
894 BodySample.second.getSamples());
895 for (const auto &Target : BodySample.second.getCallTargets()) {
896 Result.addCalledTargetSamples(BodySample.first.LineOffset,
897 MaskedDiscriminator,
898 Remapper(Target.first()), Target.second);
899 }
900 }
901 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
902 sampleprof::FunctionSamplesMap &Target =
903 Result.functionSamplesAt(CallsiteSamples.first);
904 for (const auto &Callsite : CallsiteSamples.second) {
905 sampleprof::FunctionSamples Remapped =
906 remapSamples(Callsite.second, Remapper, Error);
907 MergeResult(Error,
908 Target[std::string(Remapped.getName())].merge(Remapped));
909 }
910 }
911 return Result;
912}
913
914static sampleprof::SampleProfileFormat FormatMap[] = {
915 sampleprof::SPF_None,
916 sampleprof::SPF_Text,
917 sampleprof::SPF_None,
918 sampleprof::SPF_Ext_Binary,
919 sampleprof::SPF_GCC,
920 sampleprof::SPF_Binary};
921
922static std::unique_ptr<MemoryBuffer>
923getInputFileBuf(const StringRef &InputFile) {
924 if (InputFile == "")
925 return {};
926
927 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
928 if (!BufOrError)
929 exitWithErrorCode(BufOrError.getError(), InputFile);
930
931 return std::move(*BufOrError);
932}
933
934static void populateProfileSymbolList(MemoryBuffer *Buffer,
935 sampleprof::ProfileSymbolList &PSL) {
936 if (!Buffer)
937 return;
938
939 SmallVector<StringRef, 32> SymbolVec;
940 StringRef Data = Buffer->getBuffer();
941 Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
942
943 for (StringRef SymbolStr : SymbolVec)
944 PSL.add(SymbolStr.trim());
945}
946
947static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
948 ProfileFormat OutputFormat,
949 MemoryBuffer *Buffer,
950 sampleprof::ProfileSymbolList &WriterList,
951 bool CompressAllSections, bool UseMD5,
952 bool GenPartialProfile) {
953 populateProfileSymbolList(Buffer, WriterList);
954 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
955 warn("Profile Symbol list is not empty but the output format is not "
956 "ExtBinary format. The list will be lost in the output. ");
957
958 Writer.setProfileSymbolList(&WriterList);
959
960 if (CompressAllSections) {
961 if (OutputFormat != PF_Ext_Binary)
962 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
963 else
964 Writer.setToCompressAllSections();
965 }
966 if (UseMD5) {
967 if (OutputFormat != PF_Ext_Binary)
968 warn("-use-md5 is ignored. Specify -extbinary to enable it");
969 else
970 Writer.setUseMD5();
971 }
972 if (GenPartialProfile) {
973 if (OutputFormat != PF_Ext_Binary)
974 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
975 else
976 Writer.setPartialProfile();
977 }
978}
979
980static void
981mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
982 StringRef OutputFilename, ProfileFormat OutputFormat,
983 StringRef ProfileSymbolListFile, bool CompressAllSections,
984 bool UseMD5, bool GenPartialProfile,
985 SampleProfileLayout ProfileLayout,
986 bool SampleMergeColdContext, bool SampleTrimColdContext,
987 bool SampleColdContextFrameDepth, FailureMode FailMode,
988 bool DropProfileSymbolList, size_t OutputSizeLimit) {
989 using namespace sampleprof;
990 SampleProfileMap ProfileMap;
991 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
992 LLVMContext Context;
993 sampleprof::ProfileSymbolList WriterList;
994 std::optional<bool> ProfileIsProbeBased;
995 std::optional<bool> ProfileIsCS;
996 for (const auto &Input : Inputs) {
997 auto FS = vfs::getRealFileSystem();
998 auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
999 FSDiscriminatorPassOption);
1000 if (std::error_code EC = ReaderOrErr.getError()) {
1001 warnOrExitGivenError(FailMode, EC, Input.Filename);
1002 continue;
1003 }
1004
1005 // We need to keep the readers around until after all the files are
1006 // read so that we do not lose the function names stored in each
1007 // reader's memory. The function names are needed to write out the
1008 // merged profile map.
1009 Readers.push_back(std::move(ReaderOrErr.get()));
1010 const auto Reader = Readers.back().get();
1011 if (std::error_code EC = Reader->read()) {
1012 warnOrExitGivenError(FailMode, EC, Input.Filename);
1013 Readers.pop_back();
1014 continue;
1015 }
1016
1017 SampleProfileMap &Profiles = Reader->getProfiles();
1018 if (ProfileIsProbeBased &&
1019 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1020 exitWithError(
1021 "cannot merge probe-based profile with non-probe-based profile");
1022 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1023 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1024 exitWithError("cannot merge CS profile with non-CS profile");
1025 ProfileIsCS = FunctionSamples::ProfileIsCS;
1026 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1027 I != E; ++I) {
1028 sampleprof_error Result = sampleprof_error::success;
1029 FunctionSamples Remapped =
1030 Remapper ? remapSamples(I->second, *Remapper, Result)
1031 : FunctionSamples();
1032 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1033 SampleContext FContext = Samples.getContext();
1034 MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
1035 if (Result != sampleprof_error::success) {
1036 std::error_code EC = make_error_code(Result);
1037 handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1038 FContext.toString());
1039 }
1040 }
1041
1042 if (!DropProfileSymbolList) {
1043 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1044 Reader->getProfileSymbolList();
1045 if (ReaderList)
1046 WriterList.merge(*ReaderList);
1047 }
1048 }
1049
1050 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1051 // Use threshold calculated from profile summary unless specified.
1052 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1053 auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1054 uint64_t SampleProfColdThreshold =
1055 ProfileSummaryBuilder::getColdCountThreshold(
1056 (Summary->getDetailedSummary()));
1057
1058 // Trim and merge cold context profile using cold threshold above;
1059 SampleContextTrimmer(ProfileMap)
1060 .trimAndMergeColdContextProfiles(
1061 SampleProfColdThreshold, SampleTrimColdContext,
1062 SampleMergeColdContext, SampleColdContextFrameDepth, false);
1063 }
1064
1065 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1066 ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1067 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1068 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1069 ProfileConverter CSConverter(ProfileMap);
1070 CSConverter.convertCSProfiles();
1071 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1072 }
1073
1074 auto WriterOrErr =
1075 SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1076 if (std::error_code EC = WriterOrErr.getError())
1077 exitWithErrorCode(EC, OutputFilename);
1078
1079 auto Writer = std::move(WriterOrErr.get());
1080 // WriterList will have StringRef refering to string in Buffer.
1081 // Make sure Buffer lives as long as WriterList.
1082 auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1083 handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1084 CompressAllSections, UseMD5, GenPartialProfile);
1085
1086 // If OutputSizeLimit is 0 (default), it is the same as write().
1087 if (std::error_code EC =
1088 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1089 exitWithErrorCode(std::move(EC));
1090}
1091
1092static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1093 StringRef WeightStr, FileName;
1094 std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1095
1096 uint64_t Weight;
1097 if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1098 exitWithError("input weight must be a positive integer");
1099
1100 return {std::string(FileName), Weight};
1101}
1102
1103static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1104 StringRef Filename = WF.Filename;
1105 uint64_t Weight = WF.Weight;
1106
1107 // If it's STDIN just pass it on.
1108 if (Filename == "-") {
1109 WNI.push_back({std::string(Filename), Weight});
1110 return;
1111 }
1112
1113 llvm::sys::fs::file_status Status;
1114 llvm::sys::fs::status(Filename, Status);
1115 if (!llvm::sys::fs::exists(Status))
1116 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1117 Filename);
1118 // If it's a source file, collect it.
1119 if (llvm::sys::fs::is_regular_file(Status)) {
1120 WNI.push_back({std::string(Filename), Weight});
1121 return;
1122 }
1123
1124 if (llvm::sys::fs::is_directory(Status)) {
1125 std::error_code EC;
1126 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1127 F != E && !EC; F.increment(EC)) {
1128 if (llvm::sys::fs::is_regular_file(F->path())) {
1129 addWeightedInput(WNI, {F->path(), Weight});
1130 }
1131 }
1132 if (EC)
1133 exitWithErrorCode(EC, Filename);
1134 }
1135}
1136
1137static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1138 WeightedFileVector &WFV) {
1139 if (!Buffer)
1140 return;
1141
1142 SmallVector<StringRef, 8> Entries;
1143 StringRef Data = Buffer->getBuffer();
1144 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1145 for (const StringRef &FileWeightEntry : Entries) {
1146 StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1147 // Skip comments.
1148 if (SanitizedEntry.startswith("#"))
1149 continue;
1150 // If there's no comma, it's an unweighted profile.
1151 else if (!SanitizedEntry.contains(','))
1152 addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1153 else
1154 addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1155 }
1156}
1157
1158static int merge_main(int argc, const char *argv[]) {
1159 cl::list<std::string> InputFilenames(cl::Positional,
1160 cl::desc("<filename...>"));
1161 cl::list<std::string> WeightedInputFilenames("weighted-input",
1162 cl::desc("<weight>,<filename>"));
1163 cl::opt<std::string> InputFilenamesFile(
1164 "input-files", cl::init(""),
1165 cl::desc("Path to file containing newline-separated "
1166 "[<weight>,]<filename> entries"));
1167 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
1168 cl::aliasopt(InputFilenamesFile));
1169 cl::opt<bool> DumpInputFileList(
1170 "dump-input-file-list", cl::init(false), cl::Hidden,
1171 cl::desc("Dump the list of input files and their weights, then exit"));
1172 cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
1173 cl::desc("Symbol remapping file"));
1174 cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
1175 cl::aliasopt(RemappingFile));
1176 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
1177 cl::init("-"), cl::desc("Output file"));
1178 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
1179 cl::aliasopt(OutputFilename));
1180 cl::opt<ProfileKinds> ProfileKind(
1181 cl::desc("Profile kind:"), cl::init(instr),
1182 cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)"
}
,
1183 clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile"
}
));
1184 cl::opt<ProfileFormat> OutputFormat(
1185 cl::desc("Format of output profile"), cl::init(PF_Ext_Binary),
1186 cl::values(
1187 clEnumValN(PF_Binary, "binary", "Binary encoding")llvm::cl::OptionEnumValue { "binary", int(PF_Binary), "Binary encoding"
}
,
1188 clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding "llvm::cl::OptionEnumValue { "extbinary", int(PF_Ext_Binary), "Extensible binary encoding "
"(default)" }
1189 "(default)")llvm::cl::OptionEnumValue { "extbinary", int(PF_Ext_Binary), "Extensible binary encoding "
"(default)" }
,
1190 clEnumValN(PF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(PF_Text), "Text encoding"
}
,
1191 clEnumValN(PF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(PF_GCC), "GCC encoding (only meaningful for -sample)"
}
1192 "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(PF_GCC), "GCC encoding (only meaningful for -sample)"
}
));
1193 cl::opt<FailureMode> FailureMode(
1194 "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"),
1195 cl::values(clEnumValN(failIfAnyAreInvalid, "any",llvm::cl::OptionEnumValue { "any", int(failIfAnyAreInvalid), "Fail if any profile is invalid."
}
1196 "Fail if any profile is invalid.")llvm::cl::OptionEnumValue { "any", int(failIfAnyAreInvalid), "Fail if any profile is invalid."
}
,
1197 clEnumValN(failIfAllAreInvalid, "all",llvm::cl::OptionEnumValue { "all", int(failIfAllAreInvalid), "Fail only if all profiles are invalid."
}
1198 "Fail only if all profiles are invalid.")llvm::cl::OptionEnumValue { "all", int(failIfAllAreInvalid), "Fail only if all profiles are invalid."
}
));
1199 cl::opt<bool> OutputSparse("sparse", cl::init(false),
1200 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
1201 cl::opt<unsigned> NumThreads(
1202 "num-threads", cl::init(0),
1203 cl::desc("Number of merge threads to use (default: autodetect)"));
1204 cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
1205 cl::aliasopt(NumThreads));
1206 cl::opt<std::string> ProfileSymbolListFile(
1207 "prof-sym-list", cl::init(""),
1208 cl::desc("Path to file containing the list of function symbols "
1209 "used to populate profile symbol list"));
1210 cl::opt<bool> CompressAllSections(
1211 "compress-all-sections", cl::init(false), cl::Hidden,
1212 cl::desc("Compress all sections when writing the profile (only "
1213 "meaningful for -extbinary)"));
1214 cl::opt<bool> UseMD5(
1215 "use-md5", cl::init(false), cl::Hidden,
1216 cl::desc("Choose to use MD5 to represent string in name table (only "
1217 "meaningful for -extbinary)"));
1218 cl::opt<bool> SampleMergeColdContext(
1219 "sample-merge-cold-context", cl::init(false), cl::Hidden,
1220 cl::desc(
1221 "Merge context sample profiles whose count is below cold threshold"));
1222 cl::opt<bool> SampleTrimColdContext(
1223 "sample-trim-cold-context", cl::init(false), cl::Hidden,
1224 cl::desc(
1225 "Trim context sample profiles whose count is below cold threshold"));
1226 cl::opt<uint32_t> SampleColdContextFrameDepth(
1227 "sample-frame-depth-for-cold-context", cl::init(1),
1228 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
1229 "context-less base profile"));
1230 cl::opt<size_t> OutputSizeLimit(
1231 "output-size-limit", cl::init(0), cl::Hidden,
1232 cl::desc("Trim cold functions until profile size is below specified "
1233 "limit in bytes. This uses a heursitic and functions may be "
1234 "excessively trimmed"));
1235 cl::opt<bool> GenPartialProfile(
1236 "gen-partial-profile", cl::init(false), cl::Hidden,
1237 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
1238 cl::opt<std::string> SupplInstrWithSample(
1239 "supplement-instr-with-sample", cl::init(""), cl::Hidden,
1240 cl::desc("Supplement an instr profile with sample profile, to correct "
1241 "the profile unrepresentativeness issue. The sample "
1242 "profile is the input of the flag. Output will be in instr "
1243 "format (The flag only works with -instr)"));
1244 cl::opt<float> ZeroCounterThreshold(
1245 "zero-counter-threshold", cl::init(0.7), cl::Hidden,
1246 cl::desc("For the function which is cold in instr profile but hot in "
1247 "sample profile, if the ratio of the number of zero counters "
1248 "divided by the total number of counters is above the "
1249 "threshold, the profile of the function will be regarded as "
1250 "being harmful for performance and will be dropped."));
1251 cl::opt<unsigned> SupplMinSizeThreshold(
1252 "suppl-min-size-threshold", cl::init(10), cl::Hidden,
1253 cl::desc("If the size of a function is smaller than the threshold, "
1254 "assume it can be inlined by PGO early inliner and it won't "
1255 "be adjusted based on sample profile."));
1256 cl::opt<unsigned> InstrProfColdThreshold(
1257 "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
1258 cl::desc("User specified cold threshold for instr profile which will "
1259 "override the cold threshold got from profile summary. "));
1260 cl::opt<SampleProfileLayout> ProfileLayout(
1261 "convert-sample-profile-layout",
1262 cl::desc("Convert the generated profile to a profile with a new layout"),
1263 cl::init(SPL_None),
1264 cl::values(
1265 clEnumValN(SPL_Nest, "nest",llvm::cl::OptionEnumValue { "nest", int(SPL_Nest), "Nested profile, the input should be CS flat profile"
}
1266 "Nested profile, the input should be CS flat profile")llvm::cl::OptionEnumValue { "nest", int(SPL_Nest), "Nested profile, the input should be CS flat profile"
}
,
1267 clEnumValN(SPL_Flat, "flat",llvm::cl::OptionEnumValue { "flat", int(SPL_Flat), "Profile with nested inlinee flatten out"
}
1268 "Profile with nested inlinee flatten out")llvm::cl::OptionEnumValue { "flat", int(SPL_Flat), "Profile with nested inlinee flatten out"
}
));
1269 cl::opt<std::string> DebugInfoFilename(
1270 "debug-info", cl::init(""),
1271 cl::desc("Use the provided debug info to correlate the raw profile."));
1272 cl::opt<std::string> ProfiledBinary(
1273 "profiled-binary", cl::init(""),
1274 cl::desc("Path to binary from which the profile was collected."));
1275 cl::opt<bool> DropProfileSymbolList(
1276 "drop-profile-symbol-list", cl::init(false), cl::Hidden,
1277 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
1278 "(only meaningful for -sample)"));
1279 // WARNING: This reservoir size value is propagated to any input indexed
1280 // profiles for simplicity. Changing this value between invocations could
1281 // result in sample bias.
1282 cl::opt<uint64_t> TemporalProfTraceReservoirSize(
1283 "temporal-profile-trace-reservoir-size", cl::init(100),
1284 cl::desc("The maximum number of stored temporal profile traces (default: "
1285 "100)"));
1286 cl::opt<uint64_t> TemporalProfMaxTraceLength(
1287 "temporal-profile-max-trace-length", cl::init(10000),
1288 cl::desc("The maximum length of a single temporal profile trace "
1289 "(default: 10000)"));
1290
1291 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
1292
1293 WeightedFileVector WeightedInputs;
1294 for (StringRef Filename : InputFilenames)
1295 addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1296 for (StringRef WeightedFilename : WeightedInputFilenames)
1297 addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1298
1299 // Make sure that the file buffer stays alive for the duration of the
1300 // weighted input vector's lifetime.
1301 auto Buffer = getInputFileBuf(InputFilenamesFile);
1302 parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1303
1304 if (WeightedInputs.empty())
1305 exitWithError("no input files specified. See " +
1306 sys::path::filename(argv[0]) + " -help");
1307
1308 if (DumpInputFileList) {
1309 for (auto &WF : WeightedInputs)
1310 outs() << WF.Weight << "," << WF.Filename << "\n";
1311 return 0;
1312 }
1313
1314 std::unique_ptr<SymbolRemapper> Remapper;
1315 if (!RemappingFile.empty())
1316 Remapper = SymbolRemapper::create(RemappingFile);
1317
1318 if (!SupplInstrWithSample.empty()) {
1319 if (ProfileKind != instr)
1320 exitWithError(
1321 "-supplement-instr-with-sample can only work with -instr. ");
1322
1323 supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
1324 OutputFormat, OutputSparse, SupplMinSizeThreshold,
1325 ZeroCounterThreshold, InstrProfColdThreshold);
1326 return 0;
1327 }
1328
1329 if (ProfileKind == instr)
1330 mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
1331 OutputFilename, OutputFormat,
1332 TemporalProfTraceReservoirSize,
1333 TemporalProfMaxTraceLength, OutputSparse, NumThreads,
1334 FailureMode, ProfiledBinary);
1335 else
1336 mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
1337 OutputFormat, ProfileSymbolListFile, CompressAllSections,
1338 UseMD5, GenPartialProfile, ProfileLayout,
1339 SampleMergeColdContext, SampleTrimColdContext,
1340 SampleColdContextFrameDepth, FailureMode,
1341 DropProfileSymbolList, OutputSizeLimit);
1342 return 0;
1343}
1344
1345/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1346static void overlapInstrProfile(const std::string &BaseFilename,
1347 const std::string &TestFilename,
1348 const OverlapFuncFilters &FuncFilter,
1349 raw_fd_ostream &OS, bool IsCS) {
1350 std::mutex ErrorLock;
1351 SmallSet<instrprof_error, 4> WriterErrorCodes;
1352 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1353 WeightedFile WeightedInput{BaseFilename, 1};
1354 OverlapStats Overlap;
1355 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1356 if (E)
1357 exitWithError(std::move(E), "error in getting profile count sums");
1358 if (Overlap.Base.CountSum < 1.0f) {
1359 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1360 exit(0);
1361 }
1362 if (Overlap.Test.CountSum < 1.0f) {
1363 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1364 exit(0);
1365 }
1366 loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1367 overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1368 IsCS);
1369 Overlap.dump(OS);
1370}
1371
1372namespace {
1373struct SampleOverlapStats {
1374 SampleContext BaseName;
1375 SampleContext TestName;
1376 // Number of overlap units
1377 uint64_t OverlapCount;
1378 // Total samples of overlap units
1379 uint64_t OverlapSample;
1380 // Number of and total samples of units that only present in base or test
1381 // profile
1382 uint64_t BaseUniqueCount;
1383 uint64_t BaseUniqueSample;
1384 uint64_t TestUniqueCount;
1385 uint64_t TestUniqueSample;
1386 // Number of units and total samples in base or test profile
1387 uint64_t BaseCount;
1388 uint64_t BaseSample;
1389 uint64_t TestCount;
1390 uint64_t TestSample;
1391 // Number of and total samples of units that present in at least one profile
1392 uint64_t UnionCount;
1393 uint64_t UnionSample;
1394 // Weighted similarity
1395 double Similarity;
1396 // For SampleOverlapStats instances representing functions, weights of the
1397 // function in base and test profiles
1398 double BaseWeight;
1399 double TestWeight;
1400
1401 SampleOverlapStats()
1402 : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0),
1403 BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0),
1404 BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0),
1405 UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {}
1406};
1407} // end anonymous namespace
1408
1409namespace {
1410struct FuncSampleStats {
1411 uint64_t SampleSum;
1412 uint64_t MaxSample;
1413 uint64_t HotBlockCount;
1414 FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
1415 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1416 uint64_t HotBlockCount)
1417 : SampleSum(SampleSum), MaxSample(MaxSample),
1418 HotBlockCount(HotBlockCount) {}
1419};
1420} // end anonymous namespace
1421
1422namespace {
1423enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1424
1425// Class for updating merging steps for two sorted maps. The class should be
1426// instantiated with a map iterator type.
1427template <class T> class MatchStep {
1428public:
1429 MatchStep() = delete;
1430
1431 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1432 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1433 SecondEnd(SecondEnd), Status(MS_None) {}
1434
1435 bool areBothFinished() const {
1436 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1437 }
1438
1439 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1440
1441 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1442
1443 /// Advance one step based on the previous match status unless the previous
1444 /// status is MS_None. Then update Status based on the comparison between two
1445 /// container iterators at the current step. If the previous status is
1446 /// MS_None, it means two iterators are at the beginning and no comparison has
1447 /// been made, so we simply update Status without advancing the iterators.
1448 void updateOneStep();
1449
1450 T getFirstIter() const { return FirstIter; }
1451
1452 T getSecondIter() const { return SecondIter; }
1453
1454 MatchStatus getMatchStatus() const { return Status; }
1455
1456private:
1457 // Current iterator and end iterator of the first container.
1458 T FirstIter;
1459 T FirstEnd;
1460 // Current iterator and end iterator of the second container.
1461 T SecondIter;
1462 T SecondEnd;
1463 // Match status of the current step.
1464 MatchStatus Status;
1465};
1466} // end anonymous namespace
1467
1468template <class T> void MatchStep<T>::updateOneStep() {
1469 switch (Status) {
1470 case MS_Match:
1471 ++FirstIter;
1472 ++SecondIter;
1473 break;
1474 case MS_FirstUnique:
1475 ++FirstIter;
1476 break;
1477 case MS_SecondUnique:
1478 ++SecondIter;
1479 break;
1480 case MS_None:
1481 break;
1482 }
1483
1484 // Update Status according to iterators at the current step.
1485 if (areBothFinished())
1486 return;
1487 if (FirstIter != FirstEnd &&
1488 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1489 Status = MS_FirstUnique;
1490 else if (SecondIter != SecondEnd &&
1491 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1492 Status = MS_SecondUnique;
1493 else
1494 Status = MS_Match;
1495}
1496
1497// Return the sum of line/block samples, the max line/block sample, and the
1498// number of line/block samples above the given threshold in a function
1499// including its inlinees.
1500static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1501 FuncSampleStats &FuncStats,
1502 uint64_t HotThreshold) {
1503 for (const auto &L : Func.getBodySamples()) {
1504 uint64_t Sample = L.second.getSamples();
1505 FuncStats.SampleSum += Sample;
1506 FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1507 if (Sample >= HotThreshold)
1508 ++FuncStats.HotBlockCount;
1509 }
1510
1511 for (const auto &C : Func.getCallsiteSamples()) {
1512 for (const auto &F : C.second)
1513 getFuncSampleStats(F.second, FuncStats, HotThreshold);
1514 }
1515}
1516
1517/// Predicate that determines if a function is hot with a given threshold. We
1518/// keep it separate from its callsites for possible extension in the future.
1519static bool isFunctionHot(const FuncSampleStats &FuncStats,
1520 uint64_t HotThreshold) {
1521 // We intentionally compare the maximum sample count in a function with the
1522 // HotThreshold to get an approximate determination on hot functions.
1523 return (FuncStats.MaxSample >= HotThreshold);
1524}
1525
1526namespace {
1527class SampleOverlapAggregator {
1528public:
1529 SampleOverlapAggregator(const std::string &BaseFilename,
1530 const std::string &TestFilename,
1531 double LowSimilarityThreshold, double Epsilon,
1532 const OverlapFuncFilters &FuncFilter)
1533 : BaseFilename(BaseFilename), TestFilename(TestFilename),
1534 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1535 FuncFilter(FuncFilter) {}
1536
1537 /// Detect 0-sample input profile and report to output stream. This interface
1538 /// should be called after loadProfiles().
1539 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1540
1541 /// Write out function-level similarity statistics for functions specified by
1542 /// options --function, --value-cutoff, and --similarity-cutoff.
1543 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1544
1545 /// Write out program-level similarity and overlap statistics.
1546 void dumpProgramSummary(raw_fd_ostream &OS) const;
1547
1548 /// Write out hot-function and hot-block statistics for base_profile,
1549 /// test_profile, and their overlap. For both cases, the overlap HO is
1550 /// calculated as follows:
1551 /// Given the number of functions (or blocks) that are hot in both profiles
1552 /// HCommon and the number of functions (or blocks) that are hot in at
1553 /// least one profile HUnion, HO = HCommon / HUnion.
1554 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1555
1556 /// This function tries matching functions in base and test profiles. For each
1557 /// pair of matched functions, it aggregates the function-level
1558 /// similarity into a profile-level similarity. It also dump function-level
1559 /// similarity information of functions specified by --function,
1560 /// --value-cutoff, and --similarity-cutoff options. The program-level
1561 /// similarity PS is computed as follows:
1562 /// Given function-level similarity FS(A) for all function A, the
1563 /// weight of function A in base profile WB(A), and the weight of function
1564 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1565 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1566 /// meaning no-overlap.
1567 void computeSampleProfileOverlap(raw_fd_ostream &OS);
1568
1569 /// Initialize ProfOverlap with the sum of samples in base and test
1570 /// profiles. This function also computes and keeps the sum of samples and
1571 /// max sample counts of each function in BaseStats and TestStats for later
1572 /// use to avoid re-computations.
1573 void initializeSampleProfileOverlap();
1574
1575 /// Load profiles specified by BaseFilename and TestFilename.
1576 std::error_code loadProfiles();
1577
1578 using FuncSampleStatsMap =
1579 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1580
1581private:
1582 SampleOverlapStats ProfOverlap;
1583 SampleOverlapStats HotFuncOverlap;
1584 SampleOverlapStats HotBlockOverlap;
1585 std::string BaseFilename;
1586 std::string TestFilename;
1587 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1588 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1589 // BaseStats and TestStats hold FuncSampleStats for each function, with
1590 // function name as the key.
1591 FuncSampleStatsMap BaseStats;
1592 FuncSampleStatsMap TestStats;
1593 // Low similarity threshold in floating point number
1594 double LowSimilarityThreshold;
1595 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1596 // for tracking hot blocks.
1597 uint64_t BaseHotThreshold;
1598 uint64_t TestHotThreshold;
1599 // A small threshold used to round the results of floating point accumulations
1600 // to resolve imprecision.
1601 const double Epsilon;
1602 std::multimap<double, SampleOverlapStats, std::greater<double>>
1603 FuncSimilarityDump;
1604 // FuncFilter carries specifications in options --value-cutoff and
1605 // --function.
1606 OverlapFuncFilters FuncFilter;
1607 // Column offsets for printing the function-level details table.
1608 static const unsigned int TestWeightCol = 15;
1609 static const unsigned int SimilarityCol = 30;
1610 static const unsigned int OverlapCol = 43;
1611 static const unsigned int BaseUniqueCol = 53;
1612 static const unsigned int TestUniqueCol = 67;
1613 static const unsigned int BaseSampleCol = 81;
1614 static const unsigned int TestSampleCol = 96;
1615 static const unsigned int FuncNameCol = 111;
1616
1617 /// Return a similarity of two line/block sample counters in the same
1618 /// function in base and test profiles. The line/block-similarity BS(i) is
1619 /// computed as follows:
1620 /// For an offsets i, given the sample count at i in base profile BB(i),
1621 /// the sample count at i in test profile BT(i), the sum of sample counts
1622 /// in this function in base profile SB, and the sum of sample counts in
1623 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1624 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1625 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1626 const SampleOverlapStats &FuncOverlap) const;
1627
1628 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1629 uint64_t HotBlockCount);
1630
1631 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
1632 FuncSampleStatsMap &HotFunc,
1633 uint64_t HotThreshold) const;
1634
1635 void computeHotFuncOverlap();
1636
1637 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1638 /// Difference for two sample units in a matched function according to the
1639 /// given match status.
1640 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
1641 uint64_t HotBlockCount,
1642 SampleOverlapStats &FuncOverlap,
1643 double &Difference, MatchStatus Status);
1644
1645 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1646 /// Difference for unmatched callees that only present in one profile in a
1647 /// matched caller function.
1648 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
1649 SampleOverlapStats &FuncOverlap,
1650 double &Difference, MatchStatus Status);
1651
1652 /// This function updates sample overlap statistics of an overlap function in
1653 /// base and test profile. It also calculates a function-internal similarity
1654 /// FIS as follows:
1655 /// For offsets i that have samples in at least one profile in this
1656 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
1657 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
1658 /// 0.0 meaning no overlap.
1659 double computeSampleFunctionInternalOverlap(
1660 const sampleprof::FunctionSamples &BaseFunc,
1661 const sampleprof::FunctionSamples &TestFunc,
1662 SampleOverlapStats &FuncOverlap);
1663
1664 /// Function-level similarity (FS) is a weighted value over function internal
1665 /// similarity (FIS). This function computes a function's FS from its FIS by
1666 /// applying the weight.
1667 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
1668 uint64_t TestFuncSample) const;
1669
1670 /// The function-level similarity FS(A) for a function A is computed as
1671 /// follows:
1672 /// Compute a function-internal similarity FIS(A) by
1673 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
1674 /// function A in base profile WB(A), and the weight of function A in test
1675 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
1676 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
1677 double
1678 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
1679 const sampleprof::FunctionSamples *TestFunc,
1680 SampleOverlapStats *FuncOverlap,
1681 uint64_t BaseFuncSample,
1682 uint64_t TestFuncSample);
1683
1684 /// Profile-level similarity (PS) is a weighted aggregate over function-level
1685 /// similarities (FS). This method weights the FS value by the function
1686 /// weights in the base and test profiles for the aggregation.
1687 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
1688 uint64_t TestFuncSample) const;
1689};
1690} // end anonymous namespace
1691
1692bool SampleOverlapAggregator::detectZeroSampleProfile(
1693 raw_fd_ostream &OS) const {
1694 bool HaveZeroSample = false;
1695 if (ProfOverlap.BaseSample == 0) {
1696 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
1697 HaveZeroSample = true;
1698 }
1699 if (ProfOverlap.TestSample == 0) {
1700 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
1701 HaveZeroSample = true;
1702 }
1703 return HaveZeroSample;
1704}
1705
1706double SampleOverlapAggregator::computeBlockSimilarity(
1707 uint64_t BaseSample, uint64_t TestSample,
1708 const SampleOverlapStats &FuncOverlap) const {
1709 double BaseFrac = 0.0;
1710 double TestFrac = 0.0;
1711 if (FuncOverlap.BaseSample > 0)
1712 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
1713 if (FuncOverlap.TestSample > 0)
1714 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
1715 return 1.0 - std::fabs(BaseFrac - TestFrac);
1716}
1717
1718void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
1719 uint64_t TestSample,
1720 uint64_t HotBlockCount) {
1721 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
1722 bool IsTestHot = (TestSample >= TestHotThreshold);
1723 if (!IsBaseHot && !IsTestHot)
1724 return;
1725
1726 HotBlockOverlap.UnionCount += HotBlockCount;
1727 if (IsBaseHot)
1728 HotBlockOverlap.BaseCount += HotBlockCount;
1729 if (IsTestHot)
1730 HotBlockOverlap.TestCount += HotBlockCount;
1731 if (IsBaseHot && IsTestHot)
1732 HotBlockOverlap.OverlapCount += HotBlockCount;
1733}
1734
1735void SampleOverlapAggregator::getHotFunctions(
1736 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
1737 uint64_t HotThreshold) const {
1738 for (const auto &F : ProfStats) {
1739 if (isFunctionHot(F.second, HotThreshold))
1740 HotFunc.emplace(F.first, F.second);
1741 }
1742}
1743
1744void SampleOverlapAggregator::computeHotFuncOverlap() {
1745 FuncSampleStatsMap BaseHotFunc;
1746 getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
1747 HotFuncOverlap.BaseCount = BaseHotFunc.size();
1748
1749 FuncSampleStatsMap TestHotFunc;
1750 getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
1751 HotFuncOverlap.TestCount = TestHotFunc.size();
1752 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
1753
1754 for (const auto &F : BaseHotFunc) {
1755 if (TestHotFunc.count(F.first))
1756 ++HotFuncOverlap.OverlapCount;
1757 else
1758 ++HotFuncOverlap.UnionCount;
1759 }
1760}
1761
1762void SampleOverlapAggregator::updateOverlapStatsForFunction(
1763 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
1764 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
1765 assert(Status != MS_None &&(static_cast <bool> (Status != MS_None && "Match status should be updated before updating overlap statistics"
) ? void (0) : __assert_fail ("Status != MS_None && \"Match status should be updated before updating overlap statistics\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1766, __extension__
__PRETTY_FUNCTION__))
1766 "Match status should be updated before updating overlap statistics")(static_cast <bool> (Status != MS_None && "Match status should be updated before updating overlap statistics"
) ? void (0) : __assert_fail ("Status != MS_None && \"Match status should be updated before updating overlap statistics\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1766, __extension__
__PRETTY_FUNCTION__))
;
1767 if (Status == MS_FirstUnique) {
1768 TestSample = 0;
1769 FuncOverlap.BaseUniqueSample += BaseSample;
1770 } else if (Status == MS_SecondUnique) {
1771 BaseSample = 0;
1772 FuncOverlap.TestUniqueSample += TestSample;
1773 } else {
1774 ++FuncOverlap.OverlapCount;
1775 }
1776
1777 FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
1778 FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
1779 Difference +=
1780 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
1781 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
1782}
1783
1784void SampleOverlapAggregator::updateForUnmatchedCallee(
1785 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
1786 double &Difference, MatchStatus Status) {
1787 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&(static_cast <bool> ((Status == MS_FirstUnique || Status
== MS_SecondUnique) && "Status must be either of the two unmatched cases"
) ? void (0) : __assert_fail ("(Status == MS_FirstUnique || Status == MS_SecondUnique) && \"Status must be either of the two unmatched cases\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1788, __extension__
__PRETTY_FUNCTION__))
1788 "Status must be either of the two unmatched cases")(static_cast <bool> ((Status == MS_FirstUnique || Status
== MS_SecondUnique) && "Status must be either of the two unmatched cases"
) ? void (0) : __assert_fail ("(Status == MS_FirstUnique || Status == MS_SecondUnique) && \"Status must be either of the two unmatched cases\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1788, __extension__
__PRETTY_FUNCTION__))
;
1789 FuncSampleStats FuncStats;
1790 if (Status == MS_FirstUnique) {
1791 getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
1792 updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
1793 FuncStats.HotBlockCount, FuncOverlap,
1794 Difference, Status);
1795 } else {
1796 getFuncSampleStats(Func, FuncStats, TestHotThreshold);
1797 updateOverlapStatsForFunction(0, FuncStats.SampleSum,
1798 FuncStats.HotBlockCount, FuncOverlap,
1799 Difference, Status);
1800 }
1801}
1802
1803double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
1804 const sampleprof::FunctionSamples &BaseFunc,
1805 const sampleprof::FunctionSamples &TestFunc,
1806 SampleOverlapStats &FuncOverlap) {
1807
1808 using namespace sampleprof;
1809
1810 double Difference = 0;
1811
1812 // Accumulate Difference for regular line/block samples in the function.
1813 // We match them through sort-merge join algorithm because
1814 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
1815 // by their offsets.
1816 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
1817 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
1818 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
1819 BlockIterStep.updateOneStep();
1820 while (!BlockIterStep.areBothFinished()) {
1821 uint64_t BaseSample =
1822 BlockIterStep.isFirstFinished()
1823 ? 0
1824 : BlockIterStep.getFirstIter()->second.getSamples();
1825 uint64_t TestSample =
1826 BlockIterStep.isSecondFinished()
1827 ? 0
1828 : BlockIterStep.getSecondIter()->second.getSamples();
1829 updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
1830 Difference, BlockIterStep.getMatchStatus());
1831
1832 BlockIterStep.updateOneStep();
1833 }
1834
1835 // Accumulate Difference for callsite lines in the function. We match
1836 // them through sort-merge algorithm because
1837 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
1838 // ordered by their offsets.
1839 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
1840 BaseFunc.getCallsiteSamples().cbegin(),
1841 BaseFunc.getCallsiteSamples().cend(),
1842 TestFunc.getCallsiteSamples().cbegin(),
1843 TestFunc.getCallsiteSamples().cend());
1844 CallsiteIterStep.updateOneStep();
1845 while (!CallsiteIterStep.areBothFinished()) {
1846 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
1847 assert(CallsiteStepStatus != MS_None &&(static_cast <bool> (CallsiteStepStatus != MS_None &&
"Match status should be updated before entering loop body") ?
void (0) : __assert_fail ("CallsiteStepStatus != MS_None && \"Match status should be updated before entering loop body\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1848, __extension__
__PRETTY_FUNCTION__))
1848 "Match status should be updated before entering loop body")(static_cast <bool> (CallsiteStepStatus != MS_None &&
"Match status should be updated before entering loop body") ?
void (0) : __assert_fail ("CallsiteStepStatus != MS_None && \"Match status should be updated before entering loop body\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1848, __extension__
__PRETTY_FUNCTION__))
;
1849
1850 if (CallsiteStepStatus != MS_Match) {
1851 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
1852 ? CallsiteIterStep.getFirstIter()
1853 : CallsiteIterStep.getSecondIter();
1854 for (const auto &F : Callsite->second)
1855 updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
1856 CallsiteStepStatus);
1857 } else {
1858 // There may be multiple inlinees at the same offset, so we need to try
1859 // matching all of them. This match is implemented through sort-merge
1860 // algorithm because callsite records at the same offset are ordered by
1861 // function names.
1862 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
1863 CallsiteIterStep.getFirstIter()->second.cbegin(),
1864 CallsiteIterStep.getFirstIter()->second.cend(),
1865 CallsiteIterStep.getSecondIter()->second.cbegin(),
1866 CallsiteIterStep.getSecondIter()->second.cend());
1867 CalleeIterStep.updateOneStep();
1868 while (!CalleeIterStep.areBothFinished()) {
1869 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
1870 if (CalleeStepStatus != MS_Match) {
1871 auto Callee = (CalleeStepStatus == MS_FirstUnique)
1872 ? CalleeIterStep.getFirstIter()
1873 : CalleeIterStep.getSecondIter();
1874 updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
1875 CalleeStepStatus);
1876 } else {
1877 // An inlined function can contain other inlinees inside, so compute
1878 // the Difference recursively.
1879 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
1880 CalleeIterStep.getFirstIter()->second,
1881 CalleeIterStep.getSecondIter()->second,
1882 FuncOverlap);
1883 }
1884 CalleeIterStep.updateOneStep();
1885 }
1886 }
1887 CallsiteIterStep.updateOneStep();
1888 }
1889
1890 // Difference reflects the total differences of line/block samples in this
1891 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
1892 // reflect the similarity between function profiles in [0.0f to 1.0f].
1893 return (2.0 - Difference) / 2;
1894}
1895
1896double SampleOverlapAggregator::weightForFuncSimilarity(
1897 double FuncInternalSimilarity, uint64_t BaseFuncSample,
1898 uint64_t TestFuncSample) const {
1899 // Compute the weight as the distance between the function weights in two
1900 // profiles.
1901 double BaseFrac = 0.0;
1902 double TestFrac = 0.0;
1903 assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1904, __extension__
__PRETTY_FUNCTION__))
1904 "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1904, __extension__
__PRETTY_FUNCTION__))
;
1905 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
1906 assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1907, __extension__
__PRETTY_FUNCTION__))
1907 "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1907, __extension__
__PRETTY_FUNCTION__))
;
1908 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
1909 double WeightDistance = std::fabs(BaseFrac - TestFrac);
1910
1911 // Take WeightDistance into the similarity.
1912 return FuncInternalSimilarity * (1 - WeightDistance);
1913}
1914
1915double
1916SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
1917 uint64_t BaseFuncSample,
1918 uint64_t TestFuncSample) const {
1919
1920 double BaseFrac = 0.0;
1921 double TestFrac = 0.0;
1922 assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1923, __extension__
__PRETTY_FUNCTION__))
1923 "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1923, __extension__
__PRETTY_FUNCTION__))
;
1924 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
1925 assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1926, __extension__
__PRETTY_FUNCTION__))
1926 "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1926, __extension__
__PRETTY_FUNCTION__))
;
1927 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
1928 return FuncSimilarity * (BaseFrac + TestFrac);
1929}
1930
1931double SampleOverlapAggregator::computeSampleFunctionOverlap(
1932 const sampleprof::FunctionSamples *BaseFunc,
1933 const sampleprof::FunctionSamples *TestFunc,
1934 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
1935 uint64_t TestFuncSample) {
1936 // Default function internal similarity before weighted, meaning two functions
1937 // has no overlap.
1938 const double DefaultFuncInternalSimilarity = 0;
1939 double FuncSimilarity;
1940 double FuncInternalSimilarity;
1941
1942 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
1943 // In this case, we use DefaultFuncInternalSimilarity as the function internal
1944 // similarity.
1945 if (!BaseFunc || !TestFunc) {
1946 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
1947 } else {
1948 assert(FuncOverlap != nullptr &&(static_cast <bool> (FuncOverlap != nullptr && "FuncOverlap should be provided in this case"
) ? void (0) : __assert_fail ("FuncOverlap != nullptr && \"FuncOverlap should be provided in this case\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1949, __extension__
__PRETTY_FUNCTION__))
1949 "FuncOverlap should be provided in this case")(static_cast <bool> (FuncOverlap != nullptr && "FuncOverlap should be provided in this case"
) ? void (0) : __assert_fail ("FuncOverlap != nullptr && \"FuncOverlap should be provided in this case\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1949, __extension__
__PRETTY_FUNCTION__))
;
1950 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
1951 *BaseFunc, *TestFunc, *FuncOverlap);
1952 // Now, FuncInternalSimilarity may be a little less than 0 due to
1953 // imprecision of floating point accumulations. Make it zero if the
1954 // difference is below Epsilon.
1955 FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
1956 ? 0
1957 : FuncInternalSimilarity;
1958 }
1959 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
1960 BaseFuncSample, TestFuncSample);
1961 return FuncSimilarity;
1962}
1963
1964void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
1965 using namespace sampleprof;
1966
1967 std::unordered_map<SampleContext, const FunctionSamples *,
1968 SampleContext::Hash>
1969 BaseFuncProf;
1970 const auto &BaseProfiles = BaseReader->getProfiles();
1971 for (const auto &BaseFunc : BaseProfiles) {
1972 BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
1973 }
1974 ProfOverlap.UnionCount = BaseFuncProf.size();
1975
1976 const auto &TestProfiles = TestReader->getProfiles();
1977 for (const auto &TestFunc : TestProfiles) {
1978 SampleOverlapStats FuncOverlap;
1979 FuncOverlap.TestName = TestFunc.second.getContext();
1980 assert(TestStats.count(FuncOverlap.TestName) &&(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__
__PRETTY_FUNCTION__))
1981 "TestStats should have records for all functions in test profile "(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__
__PRETTY_FUNCTION__))
1982 "except inlinees")(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__
__PRETTY_FUNCTION__))
;
1983 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
1984
1985 bool Matched = false;
1986 const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
1987 if (Match == BaseFuncProf.end()) {
1988 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
1989 ++ProfOverlap.TestUniqueCount;
1990 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
1991 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
1992
1993 updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
1994
1995 double FuncSimilarity = computeSampleFunctionOverlap(
1996 nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
1997 ProfOverlap.Similarity +=
1998 weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
1999
2000 ++ProfOverlap.UnionCount;
2001 ProfOverlap.UnionSample += FuncStats.SampleSum;
2002 } else {
2003 ++ProfOverlap.OverlapCount;
2004
2005 // Two functions match with each other. Compute function-level overlap and
2006 // aggregate them into profile-level overlap.
2007 FuncOverlap.BaseName = Match->second->getContext();
2008 assert(BaseStats.count(FuncOverlap.BaseName) &&(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName
) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__
__PRETTY_FUNCTION__))
2009 "BaseStats should have records for all functions in base profile "(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName
) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__
__PRETTY_FUNCTION__))
2010 "except inlinees")(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName
) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__
__PRETTY_FUNCTION__))
;
2011 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2012
2013 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2014 Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2015 FuncOverlap.TestSample);
2016 ProfOverlap.Similarity +=
2017 weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2018 FuncOverlap.TestSample);
2019 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2020 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2021
2022 // Accumulate the percentage of base unique and test unique samples into
2023 // ProfOverlap.
2024 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2025 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2026
2027 // Remove matched base functions for later reporting functions not found
2028 // in test profile.
2029 BaseFuncProf.erase(Match);
2030 Matched = true;
2031 }
2032
2033 // Print function-level similarity information if specified by options.
2034 assert(TestStats.count(FuncOverlap.TestName) &&(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__
__PRETTY_FUNCTION__))
2035 "TestStats should have records for all functions in test profile "(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__
__PRETTY_FUNCTION__))
2036 "except inlinees")(static_cast <bool> (TestStats.count(FuncOverlap.TestName
) && "TestStats should have records for all functions in test profile "
"except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__
__PRETTY_FUNCTION__))
;
2037 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2038 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2039 (Matched && !FuncFilter.NameFilter.empty() &&
2040 FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2041 std::string::npos)) {
2042 assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2043, __extension__
__PRETTY_FUNCTION__))
2043 "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2043, __extension__
__PRETTY_FUNCTION__))
;
2044 FuncOverlap.BaseWeight =
2045 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2046 assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2047, __extension__
__PRETTY_FUNCTION__))
2047 "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2047, __extension__
__PRETTY_FUNCTION__))
;
2048 FuncOverlap.TestWeight =
2049 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2050 FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2051 }
2052 }
2053
2054 // Traverse through functions in base profile but not in test profile.
2055 for (const auto &F : BaseFuncProf) {
2056 assert(BaseStats.count(F.second->getContext()) &&(static_cast <bool> (BaseStats.count(F.second->getContext
()) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__
__PRETTY_FUNCTION__))
2057 "BaseStats should have records for all functions in base profile "(static_cast <bool> (BaseStats.count(F.second->getContext
()) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__
__PRETTY_FUNCTION__))
2058 "except inlinees")(static_cast <bool> (BaseStats.count(F.second->getContext
()) && "BaseStats should have records for all functions in base profile "
"except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__
__PRETTY_FUNCTION__))
;
2059 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2060 ++ProfOverlap.BaseUniqueCount;
2061 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2062
2063 updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2064
2065 double FuncSimilarity = computeSampleFunctionOverlap(
2066 nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2067 ProfOverlap.Similarity +=
2068 weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2069
2070 ProfOverlap.UnionSample += FuncStats.SampleSum;
2071 }
2072
2073 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2074 // of floating point accumulations. Make it 1.0 if the difference is below
2075 // Epsilon.
2076 ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2077 ? 1
2078 : ProfOverlap.Similarity;
2079
2080 computeHotFuncOverlap();
2081}
2082
2083void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2084 const auto &BaseProf = BaseReader->getProfiles();
2085 for (const auto &I : BaseProf) {
2086 ++ProfOverlap.BaseCount;
2087 FuncSampleStats FuncStats;
2088 getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2089 ProfOverlap.BaseSample += FuncStats.SampleSum;
2090 BaseStats.emplace(I.second.getContext(), FuncStats);
2091 }
2092
2093 const auto &TestProf = TestReader->getProfiles();
2094 for (const auto &I : TestProf) {
2095 ++ProfOverlap.TestCount;
2096 FuncSampleStats FuncStats;
2097 getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2098 ProfOverlap.TestSample += FuncStats.SampleSum;
2099 TestStats.emplace(I.second.getContext(), FuncStats);
2100 }
2101
2102 ProfOverlap.BaseName = StringRef(BaseFilename);
2103 ProfOverlap.TestName = StringRef(TestFilename);
2104}
2105
2106void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2107 using namespace sampleprof;
2108
2109 if (FuncSimilarityDump.empty())
2110 return;
2111
2112 formatted_raw_ostream FOS(OS);
2113 FOS << "Function-level details:\n";
2114 FOS << "Base weight";
2115 FOS.PadToColumn(TestWeightCol);
2116 FOS << "Test weight";
2117 FOS.PadToColumn(SimilarityCol);
2118 FOS << "Similarity";
2119 FOS.PadToColumn(OverlapCol);
2120 FOS << "Overlap";
2121 FOS.PadToColumn(BaseUniqueCol);
2122 FOS << "Base unique";
2123 FOS.PadToColumn(TestUniqueCol);
2124 FOS << "Test unique";
2125 FOS.PadToColumn(BaseSampleCol);
2126 FOS << "Base samples";
2127 FOS.PadToColumn(TestSampleCol);
2128 FOS << "Test samples";
2129 FOS.PadToColumn(FuncNameCol);
2130 FOS << "Function name\n";
2131 for (const auto &F : FuncSimilarityDump) {
2132 double OverlapPercent =
2133 F.second.UnionSample > 0
2134 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2135 : 0;
2136 double BaseUniquePercent =
2137 F.second.BaseSample > 0
2138 ? static_cast<double>(F.second.BaseUniqueSample) /
2139 F.second.BaseSample
2140 : 0;
2141 double TestUniquePercent =
2142 F.second.TestSample > 0
2143 ? static_cast<double>(F.second.TestUniqueSample) /
2144 F.second.TestSample
2145 : 0;
2146
2147 FOS << format("%.2f%%", F.second.BaseWeight * 100);
2148 FOS.PadToColumn(TestWeightCol);
2149 FOS << format("%.2f%%", F.second.TestWeight * 100);
2150 FOS.PadToColumn(SimilarityCol);
2151 FOS << format("%.2f%%", F.second.Similarity * 100);
2152 FOS.PadToColumn(OverlapCol);
2153 FOS << format("%.2f%%", OverlapPercent * 100);
2154 FOS.PadToColumn(BaseUniqueCol);
2155 FOS << format("%.2f%%", BaseUniquePercent * 100);
2156 FOS.PadToColumn(TestUniqueCol);
2157 FOS << format("%.2f%%", TestUniquePercent * 100);
2158 FOS.PadToColumn(BaseSampleCol);
2159 FOS << F.second.BaseSample;
2160 FOS.PadToColumn(TestSampleCol);
2161 FOS << F.second.TestSample;
2162 FOS.PadToColumn(FuncNameCol);
2163 FOS << F.second.TestName.toString() << "\n";
2164 }
2165}
2166
2167void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2168 OS << "Profile overlap infomation for base_profile: "
2169 << ProfOverlap.BaseName.toString()
2170 << " and test_profile: " << ProfOverlap.TestName.toString()
2171 << "\nProgram level:\n";
2172
2173 OS << " Whole program profile similarity: "
2174 << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2175
2176 assert(ProfOverlap.UnionSample > 0 &&(static_cast <bool> (ProfOverlap.UnionSample > 0 &&
"Total samples in two profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.UnionSample > 0 && \"Total samples in two profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2177, __extension__
__PRETTY_FUNCTION__))
2177 "Total samples in two profile should be greater than 0")(static_cast <bool> (ProfOverlap.UnionSample > 0 &&
"Total samples in two profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.UnionSample > 0 && \"Total samples in two profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2177, __extension__
__PRETTY_FUNCTION__))
;
2178 double OverlapPercent =
2179 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2180 assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2181, __extension__
__PRETTY_FUNCTION__))
2181 "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2181, __extension__
__PRETTY_FUNCTION__))
;
2182 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2183 ProfOverlap.BaseSample;
2184 assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2185, __extension__
__PRETTY_FUNCTION__))
2185 "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 &&
"Total samples in test profile should be greater than 0") ? void
(0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2185, __extension__
__PRETTY_FUNCTION__))
;
2186 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2187 ProfOverlap.TestSample;
2188
2189 OS << " Whole program sample overlap: "
2190 << format("%.3f%%", OverlapPercent * 100) << "\n";
2191 OS << " percentage of samples unique in base profile: "
2192 << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2193 OS << " percentage of samples unique in test profile: "
2194 << format("%.3f%%", TestUniquePercent * 100) << "\n";
2195 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2196 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2197
2198 assert(ProfOverlap.UnionCount > 0 &&(static_cast <bool> (ProfOverlap.UnionCount > 0 &&
"There should be at least one function in two input profiles"
) ? void (0) : __assert_fail ("ProfOverlap.UnionCount > 0 && \"There should be at least one function in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2199, __extension__
__PRETTY_FUNCTION__))
2199 "There should be at least one function in two input profiles")(static_cast <bool> (ProfOverlap.UnionCount > 0 &&
"There should be at least one function in two input profiles"
) ? void (0) : __assert_fail ("ProfOverlap.UnionCount > 0 && \"There should be at least one function in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2199, __extension__
__PRETTY_FUNCTION__))
;
2200 double FuncOverlapPercent =
2201 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2202 OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2203 << "\n";
2204 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2205 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2206 << "\n";
2207 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2208 << "\n";
2209}
2210
2211void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2212 raw_fd_ostream &OS) const {
2213 assert(HotFuncOverlap.UnionCount > 0 &&(static_cast <bool> (HotFuncOverlap.UnionCount > 0 &&
"There should be at least one hot function in two input profiles"
) ? void (0) : __assert_fail ("HotFuncOverlap.UnionCount > 0 && \"There should be at least one hot function in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2214, __extension__
__PRETTY_FUNCTION__))
2214 "There should be at least one hot function in two input profiles")(static_cast <bool> (HotFuncOverlap.UnionCount > 0 &&
"There should be at least one hot function in two input profiles"
) ? void (0) : __assert_fail ("HotFuncOverlap.UnionCount > 0 && \"There should be at least one hot function in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2214, __extension__
__PRETTY_FUNCTION__))
;
2215 OS << " Hot-function overlap: "
2216 << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2217 HotFuncOverlap.UnionCount * 100)
2218 << "\n";
2219 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2220 OS << " hot functions unique in base profile: "
2221 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2222 OS << " hot functions unique in test profile: "
2223 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2224
2225 assert(HotBlockOverlap.UnionCount > 0 &&(static_cast <bool> (HotBlockOverlap.UnionCount > 0 &&
"There should be at least one hot block in two input profiles"
) ? void (0) : __assert_fail ("HotBlockOverlap.UnionCount > 0 && \"There should be at least one hot block in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2226, __extension__
__PRETTY_FUNCTION__))
2226 "There should be at least one hot block in two input profiles")(static_cast <bool> (HotBlockOverlap.UnionCount > 0 &&
"There should be at least one hot block in two input profiles"
) ? void (0) : __assert_fail ("HotBlockOverlap.UnionCount > 0 && \"There should be at least one hot block in two input profiles\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2226, __extension__
__PRETTY_FUNCTION__))
;
2227 OS << " Hot-block overlap: "
2228 << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2229 HotBlockOverlap.UnionCount * 100)
2230 << "\n";
2231 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2232 OS << " hot blocks unique in base profile: "
2233 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2234 OS << " hot blocks unique in test profile: "
2235 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2236}
2237
2238std::error_code SampleOverlapAggregator::loadProfiles() {
2239 using namespace sampleprof;
2240
2241 LLVMContext Context;
2242 auto FS = vfs::getRealFileSystem();
2243 auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2244 FSDiscriminatorPassOption);
2245 if (std::error_code EC = BaseReaderOrErr.getError())
2246 exitWithErrorCode(EC, BaseFilename);
2247
2248 auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2249 FSDiscriminatorPassOption);
2250 if (std::error_code EC = TestReaderOrErr.getError())
2251 exitWithErrorCode(EC, TestFilename);
2252
2253 BaseReader = std::move(BaseReaderOrErr.get());
2254 TestReader = std::move(TestReaderOrErr.get());
2255
2256 if (std::error_code EC = BaseReader->read())
2257 exitWithErrorCode(EC, BaseFilename);
2258 if (std::error_code EC = TestReader->read())
2259 exitWithErrorCode(EC, TestFilename);
2260 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2261 exitWithError(
2262 "cannot compare probe-based profile with non-probe-based profile");
2263 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2264 exitWithError("cannot compare CS profile with non-CS profile");
2265
2266 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2267 // profile summary.
2268 ProfileSummary &BasePS = BaseReader->getSummary();
2269 ProfileSummary &TestPS = TestReader->getSummary();
2270 BaseHotThreshold =
2271 ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2272 TestHotThreshold =
2273 ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2274
2275 return std::error_code();
2276}
2277
2278void overlapSampleProfile(const std::string &BaseFilename,
2279 const std::string &TestFilename,
2280 const OverlapFuncFilters &FuncFilter,
2281 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2282 using namespace sampleprof;
2283
2284 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2285 // report 2--3 places after decimal point in percentage numbers.
2286 SampleOverlapAggregator OverlapAggr(
2287 BaseFilename, TestFilename,
2288 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2289 if (std::error_code EC = OverlapAggr.loadProfiles())
2290 exitWithErrorCode(EC);
2291
2292 OverlapAggr.initializeSampleProfileOverlap();
2293 if (OverlapAggr.detectZeroSampleProfile(OS))
2294 return;
2295
2296 OverlapAggr.computeSampleProfileOverlap(OS);
2297
2298 OverlapAggr.dumpProgramSummary(OS);
2299 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2300 OverlapAggr.dumpFuncSimilarity(OS);
2301}
2302
2303static int overlap_main(int argc, const char *argv[]) {
2304 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
2305 cl::desc("<base profile file>"));
2306 cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
2307 cl::desc("<test profile file>"));
2308 cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
2309 cl::desc("Output file"));
2310 cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
2311 cl::opt<bool> IsCS(
2312 "cs", cl::init(false),
2313 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."));
2314 cl::opt<unsigned long long> ValueCutoff(
2315 "value-cutoff", cl::init(-1),
2316 cl::desc(
2317 "Function level overlap information for every function (with calling "
2318 "context for csspgo) in test "
2319 "profile with max count value greater then the parameter value"));
2320 cl::opt<std::string> FuncNameFilter(
2321 "function",
2322 cl::desc("Function level overlap information for matching functions. For "
2323 "CSSPGO this takes a a function name with calling context"));
2324 cl::opt<unsigned long long> SimilarityCutoff(
2325 "similarity-cutoff", cl::init(0),
2326 cl::desc("For sample profiles, list function names (with calling context "
2327 "for csspgo) for overlapped functions "
2328 "with similarities below the cutoff (percentage times 10000)."));
2329 cl::opt<ProfileKinds> ProfileKind(
2330 cl::desc("Profile kind:"), cl::init(instr),
2331 cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)"
}
,
2332 clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile"
}
));
2333 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
2334
2335 std::error_code EC;
2336 raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF);
2337 if (EC)
2338 exitWithErrorCode(EC, Output);
2339
2340 if (ProfileKind == instr)
2341 overlapInstrProfile(BaseFilename, TestFilename,
2342 OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
2343 IsCS);
2344 else
2345 overlapSampleProfile(BaseFilename, TestFilename,
2346 OverlapFuncFilters{ValueCutoff, FuncNameFilter},
2347 SimilarityCutoff, OS);
2348
2349 return 0;
2350}
2351
2352namespace {
2353struct ValueSitesStats {
2354 ValueSitesStats()
2355 : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
2356 TotalNumValues(0) {}
2357 uint64_t TotalNumValueSites;
2358 uint64_t TotalNumValueSitesWithValueProfile;
2359 uint64_t TotalNumValues;
2360 std::vector<unsigned> ValueSitesHistogram;
2361};
2362} // namespace
2363
2364static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2365 ValueSitesStats &Stats, raw_fd_ostream &OS,
2366 InstrProfSymtab *Symtab) {
2367 uint32_t NS = Func.getNumValueSites(VK);
2368 Stats.TotalNumValueSites += NS;
2369 for (size_t I = 0; I < NS; ++I) {
2370 uint32_t NV = Func.getNumValueDataForSite(VK, I);
2371 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I);
2372 Stats.TotalNumValues += NV;
2373 if (NV) {
2374 Stats.TotalNumValueSitesWithValueProfile++;
2375 if (NV > Stats.ValueSitesHistogram.size())
2376 Stats.ValueSitesHistogram.resize(NV, 0);
2377 Stats.ValueSitesHistogram[NV - 1]++;
2378 }
2379
2380 uint64_t SiteSum = 0;
2381 for (uint32_t V = 0; V < NV; V++)
2382 SiteSum += VD[V].Count;
2383 if (SiteSum == 0)
2384 SiteSum = 1;
2385
2386 for (uint32_t V = 0; V < NV; V++) {
2387 OS << "\t[ " << format("%2u", I) << ", ";
2388 if (Symtab == nullptr)
2389 OS << format("%4" PRIu64"l" "u", VD[V].Value);
2390 else
2391 OS << Symtab->getFuncName(VD[V].Value);
2392 OS << ", " << format("%10" PRId64"l" "d", VD[V].Count) << " ] ("
2393 << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
2394 }
2395 }
2396}
2397
2398static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2399 ValueSitesStats &Stats) {
2400 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2401 OS << " Total number of sites with values: "
2402 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2403 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2404
2405 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2406 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2407 if (Stats.ValueSitesHistogram[I] > 0)
2408 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2409 }
2410}
2411
2412static int showInstrProfile(
2413 const std::string &Filename, bool ShowCounts, uint32_t TopN,
2414 bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary,
2415 std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions,
2416 bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow,
2417 const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds,
2418 bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces,
2419 ShowFormat SFormat, raw_fd_ostream &OS) {
2420 if (SFormat == ShowFormat::Json)
2421 exitWithError("JSON output is not supported for instr profiles");
2422 if (SFormat == ShowFormat::Yaml)
2423 exitWithError("YAML output is not supported for instr profiles");
2424 auto FS = vfs::getRealFileSystem();
2425 auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2426 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2427 if (ShowDetailedSummary && Cutoffs.empty()) {
2428 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2429 }
2430 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2431 if (Error E = ReaderOrErr.takeError())
2432 exitWithError(std::move(E), Filename);
2433
2434 auto Reader = std::move(ReaderOrErr.get());
2435 bool IsIRInstr = Reader->isIRLevelProfile();
2436 size_t ShownFunctions = 0;
2437 size_t BelowCutoffFunctions = 0;
2438 int NumVPKind = IPVK_Last - IPVK_First + 1;
2439 std::vector<ValueSitesStats> VPStats(NumVPKind);
2440
2441 auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2442 const std::pair<std::string, uint64_t> &v2) {
2443 return v1.second > v2.second;
2444 };
2445
2446 std::priority_queue<std::pair<std::string, uint64_t>,
2447 std::vector<std::pair<std::string, uint64_t>>,
2448 decltype(MinCmp)>
2449 HottestFuncs(MinCmp);
2450
2451 if (!TextFormat && OnlyListBelow) {
2452 OS << "The list of functions with the maximum counter less than "
2453 << ValueCutoff << ":\n";
2454 }
2455
2456 // Add marker so that IR-level instrumentation round-trips properly.
2457 if (TextFormat && IsIRInstr)
2458 OS << ":ir\n";
2459
2460 for (const auto &Func : *Reader) {
2461 if (Reader->isIRLevelProfile()) {
2462 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2463 if (FuncIsCS != ShowCS)
2464 continue;
2465 }
2466 bool Show = ShowAllFunctions ||
2467 (!ShowFunction.empty() && Func.Name.contains(ShowFunction));
2468
2469 bool doTextFormatDump = (Show && TextFormat);
2470
2471 if (doTextFormatDump) {
2472 InstrProfSymtab &Symtab = Reader->getSymtab();
2473 InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2474 OS);
2475 continue;
2476 }
2477
2478 assert(Func.Counts.size() > 0 && "function missing entry counter")(static_cast <bool> (Func.Counts.size() > 0 &&
"function missing entry counter") ? void (0) : __assert_fail
("Func.Counts.size() > 0 && \"function missing entry counter\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2478, __extension__
__PRETTY_FUNCTION__))
;
2479 Builder.addRecord(Func);
2480
2481 if (ShowCovered) {
2482 if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2483 OS << Func.Name << "\n";
2484 continue;
2485 }
2486
2487 uint64_t FuncMax = 0;
2488 uint64_t FuncSum = 0;
2489
2490 auto PseudoKind = Func.getCountPseudoKind();
2491 if (PseudoKind != InstrProfRecord::NotPseudo) {
2492 if (Show) {
2493 if (!ShownFunctions)
2494 OS << "Counters:\n";
2495 ++ShownFunctions;
2496 OS << " " << Func.Name << ":\n"
2497 << " Hash: " << format("0x%016" PRIx64"l" "x", Func.Hash) << "\n"
2498 << " Counters: " << Func.Counts.size();
2499 if (PseudoKind == InstrProfRecord::PseudoHot)
2500 OS << " <PseudoHot>\n";
2501 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2502 OS << " <PseudoWarm>\n";
2503 else
2504 llvm_unreachable("Unknown PseudoKind")::llvm::llvm_unreachable_internal("Unknown PseudoKind", "llvm/tools/llvm-profdata/llvm-profdata.cpp"
, 2504)
;
2505 }
2506 continue;
2507 }
2508
2509 for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2510 FuncMax = std::max(FuncMax, Func.Counts[I]);
2511 FuncSum += Func.Counts[I];
2512 }
2513
2514 if (FuncMax < ValueCutoff) {
2515 ++BelowCutoffFunctions;
2516 if (OnlyListBelow) {
2517 OS << " " << Func.Name << ": (Max = " << FuncMax
2518 << " Sum = " << FuncSum << ")\n";
2519 }
2520 continue;
2521 } else if (OnlyListBelow)
2522 continue;
2523
2524 if (TopN) {
2525 if (HottestFuncs.size() == TopN) {
2526 if (HottestFuncs.top().second < FuncMax) {
2527 HottestFuncs.pop();
2528 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2529 }
2530 } else
2531 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2532 }
2533
2534 if (Show) {
2535 if (!ShownFunctions)
2536 OS << "Counters:\n";
2537
2538 ++ShownFunctions;
2539
2540 OS << " " << Func.Name << ":\n"
2541 << " Hash: " << format("0x%016" PRIx64"l" "x", Func.Hash) << "\n"
2542 << " Counters: " << Func.Counts.size() << "\n";
2543 if (!IsIRInstr)
2544 OS << " Function count: " << Func.Counts[0] << "\n";
2545
2546 if (ShowIndirectCallTargets)
2547 OS << " Indirect Call Site Count: "
2548 << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2549
2550 uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2551 if (ShowMemOPSizes && NumMemOPCalls > 0)
2552 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2553 << "\n";
2554
2555 if (ShowCounts) {
2556 OS << " Block counts: [";
2557 size_t Start = (IsIRInstr ? 0 : 1);
2558 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2559 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2560 }
2561 OS << "]\n";
2562 }
2563
2564 if (ShowIndirectCallTargets) {
2565 OS << " Indirect Target Results:\n";
2566 traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2567 VPStats[IPVK_IndirectCallTarget], OS,
2568 &(Reader->getSymtab()));
2569 }
2570
2571 if (ShowMemOPSizes && NumMemOPCalls > 0) {
2572 OS << " Memory Intrinsic Size Results:\n";
2573 traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2574 nullptr);
2575 }
2576 }
2577 }
2578 if (Reader->hasError())
2579 exitWithError(Reader->getError(), Filename);
2580
2581 if (TextFormat || ShowCovered)
2582 return 0;
2583 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2584 bool IsIR = Reader->isIRLevelProfile();
2585 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2586 if (IsIR)
2587 OS << " entry_first = " << Reader->instrEntryBBEnabled();
2588 OS << "\n";
2589 if (ShowAllFunctions || !ShowFunction.empty())
2590 OS << "Functions shown: " << ShownFunctions << "\n";
2591 OS << "Total functions: " << PS->getNumFunctions() << "\n";
2592 if (ValueCutoff > 0) {
2593 OS << "Number of functions with maximum count (< " << ValueCutoff
2594 << "): " << BelowCutoffFunctions << "\n";
2595 OS << "Number of functions with maximum count (>= " << ValueCutoff
2596 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2597 }
2598 OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2599 OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2600
2601 if (TopN) {
2602 std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2603 while (!HottestFuncs.empty()) {
2604 SortedHottestFuncs.emplace_back(HottestFuncs.top());
2605 HottestFuncs.pop();
2606 }
2607 OS << "Top " << TopN
2608 << " functions with the largest internal block counts: \n";
2609 for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2610 OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2611 }
2612
2613 if (ShownFunctions && ShowIndirectCallTargets) {
2614 OS << "Statistics for indirect call sites profile:\n";
2615 showValueSitesStats(OS, IPVK_IndirectCallTarget,
2616 VPStats[IPVK_IndirectCallTarget]);
2617 }
2618
2619 if (ShownFunctions && ShowMemOPSizes) {
2620 OS << "Statistics for memory intrinsic calls sizes profile:\n";
2621 showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
2622 }
2623
2624 if (ShowDetailedSummary) {
2625 OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2626 OS << "Total count: " << PS->getTotalCount() << "\n";
2627 PS->printDetailedSummary(OS);
2628 }
2629
2630 if (ShowBinaryIds)
2631 if (Error E = Reader->printBinaryIds(OS))
2632 exitWithError(std::move(E), Filename);
2633
2634 if (ShowProfileVersion)
2635 OS << "Profile version: " << Reader->getVersion() << "\n";
2636
2637 if (ShowTemporalProfTraces) {
2638 auto &Traces = Reader->getTemporalProfTraces();
2639 OS << "Temporal Profile Traces (samples=" << Traces.size()
2640 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2641 for (unsigned i = 0; i < Traces.size(); i++) {
2642 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2643 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2644 for (auto &NameRef : Traces[i].FunctionNameRefs)
2645 OS << " " << Reader->getSymtab().getFuncName(NameRef) << "\n";
2646 }
2647 }
2648
2649 return 0;
2650}
2651
2652static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2653 raw_fd_ostream &OS) {
2654 if (!Reader->dumpSectionInfo(OS)) {
2655 WithColor::warning() << "-show-sec-info-only is only supported for "
2656 << "sample profile in extbinary format and is "
2657 << "ignored for other formats.\n";
2658 return;
2659 }
2660}
2661
2662namespace {
2663struct HotFuncInfo {
2664 std::string FuncName;
2665 uint64_t TotalCount;
2666 double TotalCountPercent;
2667 uint64_t MaxCount;
2668 uint64_t EntryCount;
2669
2670 HotFuncInfo()
2671 : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {}
2672
2673 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
2674 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
2675 MaxCount(MS), EntryCount(ES) {}
2676};
2677} // namespace
2678
2679// Print out detailed information about hot functions in PrintValues vector.
2680// Users specify titles and offset of every columns through ColumnTitle and
2681// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
2682// and at least 4. Besides, users can optionally give a HotFuncMetric string to
2683// print out or let it be an empty string.
2684static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
2685 const std::vector<int> &ColumnOffset,
2686 const std::vector<HotFuncInfo> &PrintValues,
2687 uint64_t HotFuncCount, uint64_t TotalFuncCount,
2688 uint64_t HotProfCount, uint64_t TotalProfCount,
2689 const std::string &HotFuncMetric,
2690 uint32_t TopNFunctions, raw_fd_ostream &OS) {
2691 assert(ColumnOffset.size() == ColumnTitle.size() &&(static_cast <bool> (ColumnOffset.size() == ColumnTitle
.size() && "ColumnOffset and ColumnTitle should have the same size"
) ? void (0) : __assert_fail ("ColumnOffset.size() == ColumnTitle.size() && \"ColumnOffset and ColumnTitle should have the same size\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2692, __extension__
__PRETTY_FUNCTION__))
2692 "ColumnOffset and ColumnTitle should have the same size")(static_cast <bool> (ColumnOffset.size() == ColumnTitle
.size() && "ColumnOffset and ColumnTitle should have the same size"
) ? void (0) : __assert_fail ("ColumnOffset.size() == ColumnTitle.size() && \"ColumnOffset and ColumnTitle should have the same size\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2692, __extension__
__PRETTY_FUNCTION__))
;
2693 assert(ColumnTitle.size() >= 4 &&(static_cast <bool> (ColumnTitle.size() >= 4 &&
"ColumnTitle should have at least 4 elements") ? void (0) : __assert_fail
("ColumnTitle.size() >= 4 && \"ColumnTitle should have at least 4 elements\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2694, __extension__
__PRETTY_FUNCTION__))
2694 "ColumnTitle should have at least 4 elements")(static_cast <bool> (ColumnTitle.size() >= 4 &&
"ColumnTitle should have at least 4 elements") ? void (0) : __assert_fail
("ColumnTitle.size() >= 4 && \"ColumnTitle should have at least 4 elements\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2694, __extension__
__PRETTY_FUNCTION__))
;
2695 assert(TotalFuncCount > 0 &&(static_cast <bool> (TotalFuncCount > 0 && "There should be at least one function in the profile"
) ? void (0) : __assert_fail ("TotalFuncCount > 0 && \"There should be at least one function in the profile\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2696, __extension__
__PRETTY_FUNCTION__))
2696 "There should be at least one function in the profile")(static_cast <bool> (TotalFuncCount > 0 && "There should be at least one function in the profile"
) ? void (0) : __assert_fail ("TotalFuncCount > 0 && \"There should be at least one function in the profile\""
, "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2696, __extension__
__PRETTY_FUNCTION__))
;
2697 double TotalProfPercent = 0;
2698 if (TotalProfCount > 0)
2699 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
2700
2701 formatted_raw_ostream FOS(OS);
2702 FOS << HotFuncCount << " out of " << TotalFuncCount
2703 << " functions with profile ("
2704 << format("%.2f%%",
2705 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
2706 << ") are considered hot functions";
2707 if (!HotFuncMetric.empty())
2708 FOS << " (" << HotFuncMetric << ")";
2709 FOS << ".\n";
2710 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
2711 << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
2712
2713 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
2714 FOS.PadToColumn(ColumnOffset[I]);
2715 FOS << ColumnTitle[I];
2716 }
2717 FOS << "\n";
2718
2719 uint32_t Count = 0;
2720 for (const auto &R : PrintValues) {
2721 if (TopNFunctions && (Count++ == TopNFunctions))
2722 break;
2723 FOS.PadToColumn(ColumnOffset[0]);
2724 FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
2725 FOS.PadToColumn(ColumnOffset[1]);
2726 FOS << R.MaxCount;
2727 FOS.PadToColumn(ColumnOffset[2]);
2728 FOS << R.EntryCount;
2729 FOS.PadToColumn(ColumnOffset[3]);
2730 FOS << R.FuncName << "\n";
2731 }
2732}
2733
2734static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
2735 ProfileSummary &PS, uint32_t TopN,
2736 raw_fd_ostream &OS) {
2737 using namespace sampleprof;
2738
2739 const uint32_t HotFuncCutoff = 990000;
2740 auto &SummaryVector = PS.getDetailedSummary();
2741 uint64_t MinCountThreshold = 0;
2742 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
2743 if (SummaryEntry.Cutoff == HotFuncCutoff) {
2744 MinCountThreshold = SummaryEntry.MinCount;
2745 break;
2746 }
2747 }
2748
2749 // Traverse all functions in the profile and keep only hot functions.
2750 // The following loop also calculates the sum of total samples of all
2751 // functions.
2752 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
2753 std::greater<uint64_t>>
2754 HotFunc;
2755 uint64_t ProfileTotalSample = 0;
2756 uint64_t HotFuncSample = 0;
2757 uint64_t HotFuncCount = 0;
2758
2759 for (const auto &I : Profiles) {
2760 FuncSampleStats FuncStats;
2761 const FunctionSamples &FuncProf = I.second;
2762 ProfileTotalSample += FuncProf.getTotalSamples();
2763 getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
2764
2765 if (isFunctionHot(FuncStats, MinCountThreshold)) {
2766 HotFunc.emplace(FuncProf.getTotalSamples(),
2767 std::make_pair(&(I.second), FuncStats.MaxSample));
2768 HotFuncSample += FuncProf.getTotalSamples();
2769 ++HotFuncCount;
2770 }
2771 }
2772
2773 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
2774 "Entry sample", "Function name"};
2775 std::vector<int> ColumnOffset{0, 24, 42, 58};
2776 std::string Metric =
2777 std::string("max sample >= ") + std::to_string(MinCountThreshold);
2778 std::vector<HotFuncInfo> PrintValues;
2779 for (const auto &FuncPair : HotFunc) {
2780 const FunctionSamples &Func = *FuncPair.second.first;
2781 double TotalSamplePercent =
2782 (ProfileTotalSample > 0)
2783 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
2784 : 0;
2785 PrintValues.emplace_back(
2786 HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
2787 TotalSamplePercent, FuncPair.second.second,
2788 Func.getHeadSamplesEstimate()));
2789 }
2790 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
2791 Profiles.size(), HotFuncSample, ProfileTotalSample,
2792 Metric, TopN, OS);
2793
2794 return 0;
2795}
2796
2797static int showSampleProfile(const std::string &Filename, bool ShowCounts,
2798 uint32_t TopN, bool ShowAllFunctions,
2799 bool ShowDetailedSummary,
2800 const std::string &ShowFunction,
2801 bool ShowProfileSymbolList,
2802 bool ShowSectionInfoOnly, bool ShowHotFuncList,
2803 ShowFormat SFormat, raw_fd_ostream &OS) {
2804 if (SFormat == ShowFormat::Yaml)
2805 exitWithError("YAML output is not supported for sample profiles");
2806 using namespace sampleprof;
2807 LLVMContext Context;
2808 auto FS = vfs::getRealFileSystem();
2809 auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
2810 FSDiscriminatorPassOption);
2811 if (std::error_code EC = ReaderOrErr.getError())
2812 exitWithErrorCode(EC, Filename);
2813
2814 auto Reader = std::move(ReaderOrErr.get());
2815 if (ShowSectionInfoOnly) {
2816 showSectionInfo(Reader.get(), OS);
2817 return 0;
2818 }
2819
2820 if (std::error_code EC = Reader->read())
2821 exitWithErrorCode(EC, Filename);
2822
2823 if (ShowAllFunctions || ShowFunction.empty()) {
2824 if (SFormat == ShowFormat::Json)
2825 Reader->dumpJson(OS);
2826 else
2827 Reader->dump(OS);
2828 } else {
2829 if (SFormat == ShowFormat::Json)
2830 exitWithError(
2831 "the JSON format is supported only when all functions are to "
2832 "be printed");
2833
2834 // TODO: parse context string to support filtering by contexts.
2835 Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
2836 }
2837
2838 if (ShowProfileSymbolList) {
2839 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
2840 Reader->getProfileSymbolList();
2841 ReaderList->dump(OS);
2842 }
2843
2844 if (ShowDetailedSummary) {
2845 auto &PS = Reader->getSummary();
2846 PS.printSummary(OS);
2847 PS.printDetailedSummary(OS);
2848 }
2849
2850 if (ShowHotFuncList || TopN)
2851 showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS);
2852
2853 return 0;
2854}
2855
2856static int showMemProfProfile(const std::string &Filename,
2857 const std::string &ProfiledBinary,
2858 ShowFormat SFormat, raw_fd_ostream &OS) {
2859 if (SFormat == ShowFormat::Json)
2860 exitWithError("JSON output is not supported for MemProf");
2861 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
2862 Filename, ProfiledBinary, /*KeepNames=*/true);
2863 if (Error E = ReaderOr.takeError())
2864 // Since the error can be related to the profile or the binary we do not
2865 // pass whence. Instead additional context is provided where necessary in
2866 // the error message.
2867 exitWithError(std::move(E), /*Whence*/ "");
2868
2869 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
2870 ReaderOr.get().release());
2871
2872 Reader->printYAML(OS);
2873 return 0;
2874}
2875
2876static int showDebugInfoCorrelation(const std::string &Filename,
2877 bool ShowDetailedSummary,
2878 bool ShowProfileSymbolList,
2879 ShowFormat SFormat, raw_fd_ostream &OS) {
2880 if (SFormat == ShowFormat::Json)
2881 exitWithError("JSON output is not supported for debug info correlation");
2882 std::unique_ptr<InstrProfCorrelator> Correlator;
2883 if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator))
2884 exitWithError(std::move(Err), Filename);
2885 if (SFormat == ShowFormat::Yaml) {
2886 if (auto Err = Correlator->dumpYaml(OS))
2887 exitWithError(std::move(Err), Filename);
2888 return 0;
2889 }
2890
2891 if (auto Err = Correlator->correlateProfileData())
2892 exitWithError(std::move(Err), Filename);
2893
2894 InstrProfSymtab Symtab;
2895 if (auto Err = Symtab.create(
2896 StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
2897 exitWithError(std::move(Err), Filename);
2898
2899 if (ShowProfileSymbolList)
2900 Symtab.dumpNames(OS);
2901 // TODO: Read "Profile Data Type" from debug info to compute and show how many
2902 // counters the section holds.
2903 if (ShowDetailedSummary)
2904 OS << "Counters section size: 0x"
2905 << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
2906 OS << "Found " << Correlator->getDataSize() << " functions\n";
2907
2908 return 0;
2909}
2910
2911static int show_main(int argc, const char *argv[]) {
2912 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
2913
2914 cl::opt<bool> ShowCounts("counts", cl::init(false),
2915 cl::desc("Show counter values for shown functions"));
2916 cl::opt<ShowFormat> SFormat(
2917 "show-format", cl::init(ShowFormat::Text),
2918 cl::desc("Emit output in the selected format if supported"),
2919 cl::values(clEnumValN(ShowFormat::Text, "text",llvm::cl::OptionEnumValue { "text", int(ShowFormat::Text), "emit normal text output (default)"
}
2920 "emit normal text output (default)")llvm::cl::OptionEnumValue { "text", int(ShowFormat::Text), "emit normal text output (default)"
}
,
2921 clEnumValN(ShowFormat::Json, "json", "emit JSON")llvm::cl::OptionEnumValue { "json", int(ShowFormat::Json), "emit JSON"
}
,
2922 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")llvm::cl::OptionEnumValue { "yaml", int(ShowFormat::Yaml), "emit YAML"
}
));
2923 // TODO: Consider replacing this with `--show-format=text-encoding`.
2924 cl::opt<bool> TextFormat(
2925 "text", cl::init(false),
2926 cl::desc("Show instr profile data in text dump format"));
2927 cl::opt<bool> JsonFormat(
2928 "json", cl::desc("Show sample profile data in the JSON format "
2929 "(deprecated, please use --show-format=json)"));
2930 cl::opt<bool> ShowIndirectCallTargets(
2931 "ic-targets", cl::init(false),
2932 cl::desc("Show indirect call site target values for shown functions"));
2933 cl::opt<bool> ShowMemOPSizes(
2934 "memop-sizes", cl::init(false),
2935 cl::desc("Show the profiled sizes of the memory intrinsic calls "
2936 "for shown functions"));
2937 cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
2938 cl::desc("Show detailed profile summary"));
2939 cl::list<uint32_t> DetailedSummaryCutoffs(
2940 cl::CommaSeparated, "detailed-summary-cutoffs",
2941 cl::desc(
2942 "Cutoff percentages (times 10000) for generating detailed summary"),
2943 cl::value_desc("800000,901000,999999"));
2944 cl::opt<bool> ShowHotFuncList(
2945 "hot-func-list", cl::init(false),
2946 cl::desc("Show profile summary of a list of hot functions"));
2947 cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
2948 cl::desc("Details for every function"));
2949 cl::opt<bool> ShowCS("showcs", cl::init(false),
2950 cl::desc("Show context sensitive counts"));
2951 cl::opt<std::string> ShowFunction("function",
2952 cl::desc("Details for matching functions"));
2953
2954 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
2955 cl::init("-"), cl::desc("Output file"));
2956 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
2957 cl::aliasopt(OutputFilename));
2958 cl::opt<ProfileKinds> ProfileKind(
2959 cl::desc("Profile kind:"), cl::init(instr),
2960 cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)"
}
,
2961 clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile"
}
,
2962 clEnumVal(memory, "MemProf memory access profile")llvm::cl::OptionEnumValue { "memory", int(memory), "MemProf memory access profile"
}
));
2963 cl::opt<uint32_t> TopNFunctions(
2964 "topn", cl::init(0),
2965 cl::desc("Show the list of functions with the largest internal counts"));
2966 cl::opt<uint32_t> ValueCutoff(
2967 "value-cutoff", cl::init(0),
2968 cl::desc("Set the count value cutoff. Functions with the maximum count "
2969 "less than this value will not be printed out. (Default is 0)"));
2970 cl::opt<bool> OnlyListBelow(
2971 "list-below-cutoff", cl::init(false),
2972 cl::desc("Only output names of functions whose max count values are "
2973 "below the cutoff value"));
2974 cl::opt<bool> ShowProfileSymbolList(
2975 "show-prof-sym-list", cl::init(false),
2976 cl::desc("Show profile symbol list if it exists in the profile. "));
2977 cl::opt<bool> ShowSectionInfoOnly(
2978 "show-sec-info-only", cl::init(false),
2979 cl::desc("Show the information of each section in the sample profile. "
2980 "The flag is only usable when the sample profile is in "
2981 "extbinary format"));
2982 cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
2983 cl::desc("Show binary ids in the profile. "));
2984 cl::opt<bool> ShowTemporalProfTraces(
2985 "temporal-profile-traces",
2986 cl::desc("Show temporal profile traces in the profile."));
2987 cl::opt<std::string> DebugInfoFilename(
2988 "debug-info", cl::init(""),
2989 cl::desc("Read and extract profile metadata from debug info and show "
2990 "the functions it found."));
2991 cl::opt<bool> ShowCovered(
2992 "covered", cl::init(false),
2993 cl::desc("Show only the functions that have been executed."));
2994 cl::opt<std::string> ProfiledBinary(
2995 "profiled-binary", cl::init(""),
2996 cl::desc("Path to binary from which the profile was collected."));
2997 cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
2998 cl::desc("Show profile version. "));
2999 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
3000
3001 if (Filename.empty() && DebugInfoFilename.empty())
3002 exitWithError(
3003 "the positional argument '<profdata-file>' is required unless '--" +
3004 DebugInfoFilename.ArgStr + "' is provided");
3005
3006 if (Filename == OutputFilename) {
3007 errs() << sys::path::filename(argv[0])
3008 << ": Input file name cannot be the same as the output file name!\n";
3009 return 1;
3010 }
3011 if (JsonFormat)
3012 SFormat = ShowFormat::Json;
3013
3014 std::error_code EC;
3015 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3016 if (EC)
3017 exitWithErrorCode(EC, OutputFilename);
3018
3019 if (ShowAllFunctions && !ShowFunction.empty())
3020 WithColor::warning() << "-function argument ignored: showing all functions\n";
3021
3022 if (!DebugInfoFilename.empty())
3023 return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary,
3024 ShowProfileSymbolList, SFormat, OS);
3025
3026 if (ProfileKind == instr)
3027 return showInstrProfile(
3028 Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
3029 ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
3030 ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
3031 TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion,
3032 ShowTemporalProfTraces, SFormat, OS);
3033 if (ProfileKind == sample)
3034 return showSampleProfile(Filename, ShowCounts, TopNFunctions,
3035 ShowAllFunctions, ShowDetailedSummary,
3036 ShowFunction, ShowProfileSymbolList,
3037 ShowSectionInfoOnly, ShowHotFuncList, SFormat, OS);
3038 return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS);
3039}
3040
3041int llvm_profdata_main(int argc, char **argvNonConst,
3042 const llvm::ToolContext &) {
3043 const char **argv = const_cast<const char **>(argvNonConst);
3044 InitLLVM X(argc, argv);
3045
3046 StringRef ProgName(sys::path::filename(argv[0]));
3047 if (argc > 1) {
3048 int (*func)(int, const char *[]) = nullptr;
3049
3050 if (strcmp(argv[1], "merge") == 0)
3051 func = merge_main;
3052 else if (strcmp(argv[1], "show") == 0)
3053 func = show_main;
3054 else if (strcmp(argv[1], "overlap") == 0)
3055 func = overlap_main;
3056
3057 if (func) {
3058 std::string Invocation(ProgName.str() + " " + argv[1]);
3059 argv[1] = Invocation.c_str();
3060 return func(argc - 1, argv + 1);
3061 }
3062
3063 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
3064 strcmp(argv[1], "--help") == 0) {
3065
3066 errs() << "OVERVIEW: LLVM profile data tools\n\n"
3067 << "USAGE: " << ProgName << " <command> [args...]\n"
3068 << "USAGE: " << ProgName << " <command> -help\n\n"
3069 << "See each individual command --help for more details.\n"
3070 << "Available commands: merge, show, overlap\n";
3071 return 0;
3072 }
3073 }
3074
3075 if (argc < 2)
3076 errs() << ProgName << ": No command specified!\n";
3077 else
3078 errs() << ProgName << ": Unknown command!\n";
3079
3080 errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n";
3081 return 1;
3082}