File: | build/source/llvm/tools/llvm-profdata/llvm-profdata.cpp |
Warning: | line 549, column 5 Value stored to 'Threshold' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // llvm-profdata merges .profdata files. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/SmallSet.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/IR/LLVMContext.h" |
17 | #include "llvm/Object/Binary.h" |
18 | #include "llvm/ProfileData/InstrProfCorrelator.h" |
19 | #include "llvm/ProfileData/InstrProfReader.h" |
20 | #include "llvm/ProfileData/InstrProfWriter.h" |
21 | #include "llvm/ProfileData/MemProf.h" |
22 | #include "llvm/ProfileData/ProfileCommon.h" |
23 | #include "llvm/ProfileData/RawMemProfReader.h" |
24 | #include "llvm/ProfileData/SampleProfReader.h" |
25 | #include "llvm/ProfileData/SampleProfWriter.h" |
26 | #include "llvm/Support/CommandLine.h" |
27 | #include "llvm/Support/Discriminator.h" |
28 | #include "llvm/Support/Errc.h" |
29 | #include "llvm/Support/FileSystem.h" |
30 | #include "llvm/Support/Format.h" |
31 | #include "llvm/Support/FormattedStream.h" |
32 | #include "llvm/Support/InitLLVM.h" |
33 | #include "llvm/Support/LLVMDriver.h" |
34 | #include "llvm/Support/MD5.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/Path.h" |
37 | #include "llvm/Support/ThreadPool.h" |
38 | #include "llvm/Support/Threading.h" |
39 | #include "llvm/Support/VirtualFileSystem.h" |
40 | #include "llvm/Support/WithColor.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include <algorithm> |
43 | #include <cmath> |
44 | #include <optional> |
45 | #include <queue> |
46 | |
47 | using namespace llvm; |
48 | |
49 | // We use this string to indicate that there are |
50 | // multiple static functions map to the same name. |
51 | const std::string DuplicateNameStr = "----"; |
52 | |
53 | enum ProfileFormat { |
54 | PF_None = 0, |
55 | PF_Text, |
56 | PF_Compact_Binary, // Deprecated |
57 | PF_Ext_Binary, |
58 | PF_GCC, |
59 | PF_Binary |
60 | }; |
61 | |
62 | enum class ShowFormat { Text, Json, Yaml }; |
63 | |
64 | static void warn(Twine Message, std::string Whence = "", |
65 | std::string Hint = "") { |
66 | WithColor::warning(); |
67 | if (!Whence.empty()) |
68 | errs() << Whence << ": "; |
69 | errs() << Message << "\n"; |
70 | if (!Hint.empty()) |
71 | WithColor::note() << Hint << "\n"; |
72 | } |
73 | |
74 | static void warn(Error E, StringRef Whence = "") { |
75 | if (E.isA<InstrProfError>()) { |
76 | handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { |
77 | warn(IPE.message(), std::string(Whence), std::string("")); |
78 | }); |
79 | } |
80 | } |
81 | |
82 | static void exitWithError(Twine Message, std::string Whence = "", |
83 | std::string Hint = "") { |
84 | WithColor::error(); |
85 | if (!Whence.empty()) |
86 | errs() << Whence << ": "; |
87 | errs() << Message << "\n"; |
88 | if (!Hint.empty()) |
89 | WithColor::note() << Hint << "\n"; |
90 | ::exit(1); |
91 | } |
92 | |
93 | static void exitWithError(Error E, StringRef Whence = "") { |
94 | if (E.isA<InstrProfError>()) { |
95 | handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { |
96 | instrprof_error instrError = IPE.get(); |
97 | StringRef Hint = ""; |
98 | if (instrError == instrprof_error::unrecognized_format) { |
99 | // Hint in case user missed specifying the profile type. |
100 | Hint = "Perhaps you forgot to use the --sample or --memory option?"; |
101 | } |
102 | exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); |
103 | }); |
104 | return; |
105 | } |
106 | |
107 | exitWithError(toString(std::move(E)), std::string(Whence)); |
108 | } |
109 | |
110 | static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { |
111 | exitWithError(EC.message(), std::string(Whence)); |
112 | } |
113 | |
114 | namespace { |
115 | enum ProfileKinds { instr, sample, memory }; |
116 | enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; |
117 | } |
118 | |
119 | static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, |
120 | StringRef Whence = "") { |
121 | if (FailMode == failIfAnyAreInvalid) |
122 | exitWithErrorCode(EC, Whence); |
123 | else |
124 | warn(EC.message(), std::string(Whence)); |
125 | } |
126 | |
127 | static void handleMergeWriterError(Error E, StringRef WhenceFile = "", |
128 | StringRef WhenceFunction = "", |
129 | bool ShowHint = true) { |
130 | if (!WhenceFile.empty()) |
131 | errs() << WhenceFile << ": "; |
132 | if (!WhenceFunction.empty()) |
133 | errs() << WhenceFunction << ": "; |
134 | |
135 | auto IPE = instrprof_error::success; |
136 | E = handleErrors(std::move(E), |
137 | [&IPE](std::unique_ptr<InstrProfError> E) -> Error { |
138 | IPE = E->get(); |
139 | return Error(std::move(E)); |
140 | }); |
141 | errs() << toString(std::move(E)) << "\n"; |
142 | |
143 | if (ShowHint) { |
144 | StringRef Hint = ""; |
145 | if (IPE != instrprof_error::success) { |
146 | switch (IPE) { |
147 | case instrprof_error::hash_mismatch: |
148 | case instrprof_error::count_mismatch: |
149 | case instrprof_error::value_site_count_mismatch: |
150 | Hint = "Make sure that all profile data to be merged is generated " |
151 | "from the same binary."; |
152 | break; |
153 | default: |
154 | break; |
155 | } |
156 | } |
157 | |
158 | if (!Hint.empty()) |
159 | errs() << Hint << "\n"; |
160 | } |
161 | } |
162 | |
163 | namespace { |
164 | /// A remapper from original symbol names to new symbol names based on a file |
165 | /// containing a list of mappings from old name to new name. |
166 | class SymbolRemapper { |
167 | std::unique_ptr<MemoryBuffer> File; |
168 | DenseMap<StringRef, StringRef> RemappingTable; |
169 | |
170 | public: |
171 | /// Build a SymbolRemapper from a file containing a list of old/new symbols. |
172 | static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) { |
173 | auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); |
174 | if (!BufOrError) |
175 | exitWithErrorCode(BufOrError.getError(), InputFile); |
176 | |
177 | auto Remapper = std::make_unique<SymbolRemapper>(); |
178 | Remapper->File = std::move(BufOrError.get()); |
179 | |
180 | for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#'); |
181 | !LineIt.is_at_eof(); ++LineIt) { |
182 | std::pair<StringRef, StringRef> Parts = LineIt->split(' '); |
183 | if (Parts.first.empty() || Parts.second.empty() || |
184 | Parts.second.count(' ')) { |
185 | exitWithError("unexpected line in remapping file", |
186 | (InputFile + ":" + Twine(LineIt.line_number())).str(), |
187 | "expected 'old_symbol new_symbol'"); |
188 | } |
189 | Remapper->RemappingTable.insert(Parts); |
190 | } |
191 | return Remapper; |
192 | } |
193 | |
194 | /// Attempt to map the given old symbol into a new symbol. |
195 | /// |
196 | /// \return The new symbol, or \p Name if no such symbol was found. |
197 | StringRef operator()(StringRef Name) { |
198 | StringRef New = RemappingTable.lookup(Name); |
199 | return New.empty() ? Name : New; |
200 | } |
201 | }; |
202 | } |
203 | |
204 | struct WeightedFile { |
205 | std::string Filename; |
206 | uint64_t Weight; |
207 | }; |
208 | typedef SmallVector<WeightedFile, 5> WeightedFileVector; |
209 | |
210 | /// Keep track of merged data and reported errors. |
211 | struct WriterContext { |
212 | std::mutex Lock; |
213 | InstrProfWriter Writer; |
214 | std::vector<std::pair<Error, std::string>> Errors; |
215 | std::mutex &ErrLock; |
216 | SmallSet<instrprof_error, 4> &WriterErrorCodes; |
217 | |
218 | WriterContext(bool IsSparse, std::mutex &ErrLock, |
219 | SmallSet<instrprof_error, 4> &WriterErrorCodes, |
220 | uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0) |
221 | : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock), |
222 | WriterErrorCodes(WriterErrorCodes) {} |
223 | }; |
224 | |
225 | /// Computer the overlap b/w profile BaseFilename and TestFileName, |
226 | /// and store the program level result to Overlap. |
227 | static void overlapInput(const std::string &BaseFilename, |
228 | const std::string &TestFilename, WriterContext *WC, |
229 | OverlapStats &Overlap, |
230 | const OverlapFuncFilters &FuncFilter, |
231 | raw_fd_ostream &OS, bool IsCS) { |
232 | auto FS = vfs::getRealFileSystem(); |
233 | auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS); |
234 | if (Error E = ReaderOrErr.takeError()) { |
235 | // Skip the empty profiles by returning sliently. |
236 | auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); |
237 | if (ErrorCode != instrprof_error::empty_raw_profile) |
238 | WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg), |
239 | TestFilename); |
240 | return; |
241 | } |
242 | |
243 | auto Reader = std::move(ReaderOrErr.get()); |
244 | for (auto &I : *Reader) { |
245 | OverlapStats FuncOverlap(OverlapStats::FunctionLevel); |
246 | FuncOverlap.setFuncInfo(I.Name, I.Hash); |
247 | |
248 | WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter); |
249 | FuncOverlap.dump(OS); |
250 | } |
251 | } |
252 | |
253 | /// Load an input into a writer context. |
254 | static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, |
255 | const InstrProfCorrelator *Correlator, |
256 | const StringRef ProfiledBinary, WriterContext *WC) { |
257 | std::unique_lock<std::mutex> CtxGuard{WC->Lock}; |
258 | |
259 | // Copy the filename, because llvm::ThreadPool copied the input "const |
260 | // WeightedFile &" by value, making a reference to the filename within it |
261 | // invalid outside of this packaged task. |
262 | std::string Filename = Input.Filename; |
263 | |
264 | using ::llvm::memprof::RawMemProfReader; |
265 | if (RawMemProfReader::hasFormat(Input.Filename)) { |
266 | auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary); |
267 | if (!ReaderOrErr) { |
268 | exitWithError(ReaderOrErr.takeError(), Input.Filename); |
269 | } |
270 | std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get()); |
271 | // Check if the profile types can be merged, e.g. clang frontend profiles |
272 | // should not be merged with memprof profiles. |
273 | if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { |
274 | consumeError(std::move(E)); |
275 | WC->Errors.emplace_back( |
276 | make_error<StringError>( |
277 | "Cannot merge MemProf profile with Clang generated profile.", |
278 | std::error_code()), |
279 | Filename); |
280 | return; |
281 | } |
282 | |
283 | auto MemProfError = [&](Error E) { |
284 | auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); |
285 | WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg), |
286 | Filename); |
287 | }; |
288 | |
289 | // Add the frame mappings into the writer context. |
290 | const auto &IdToFrame = Reader->getFrameMapping(); |
291 | for (const auto &I : IdToFrame) { |
292 | bool Succeeded = WC->Writer.addMemProfFrame( |
293 | /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError); |
294 | // If we weren't able to add the frame mappings then it doesn't make sense |
295 | // to try to add the records from this profile. |
296 | if (!Succeeded) |
297 | return; |
298 | } |
299 | const auto &FunctionProfileData = Reader->getProfileData(); |
300 | // Add the memprof records into the writer context. |
301 | for (const auto &I : FunctionProfileData) { |
302 | WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second); |
303 | } |
304 | return; |
305 | } |
306 | |
307 | auto FS = vfs::getRealFileSystem(); |
308 | auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator); |
309 | if (Error E = ReaderOrErr.takeError()) { |
310 | // Skip the empty profiles by returning silently. |
311 | auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); |
312 | if (ErrCode != instrprof_error::empty_raw_profile) |
313 | WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg), |
314 | Filename); |
315 | return; |
316 | } |
317 | |
318 | auto Reader = std::move(ReaderOrErr.get()); |
319 | if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { |
320 | consumeError(std::move(E)); |
321 | WC->Errors.emplace_back( |
322 | make_error<StringError>( |
323 | "Merge IR generated profile with Clang generated profile.", |
324 | std::error_code()), |
325 | Filename); |
326 | return; |
327 | } |
328 | |
329 | for (auto &I : *Reader) { |
330 | if (Remapper) |
331 | I.Name = (*Remapper)(I.Name); |
332 | const StringRef FuncName = I.Name; |
333 | bool Reported = false; |
334 | WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) { |
335 | if (Reported) { |
336 | consumeError(std::move(E)); |
337 | return; |
338 | } |
339 | Reported = true; |
340 | // Only show hint the first time an error occurs. |
341 | auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); |
342 | std::unique_lock<std::mutex> ErrGuard{WC->ErrLock}; |
343 | bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second; |
344 | handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg), |
345 | Input.Filename, FuncName, firstTime); |
346 | }); |
347 | } |
348 | |
349 | if (Reader->hasTemporalProfile()) { |
350 | auto &Traces = Reader->getTemporalProfTraces(Input.Weight); |
351 | if (!Traces.empty()) |
352 | WC->Writer.addTemporalProfileTraces( |
353 | Traces, Reader->getTemporalProfTraceStreamSize()); |
354 | } |
355 | if (Reader->hasError()) { |
356 | if (Error E = Reader->getError()) |
357 | WC->Errors.emplace_back(std::move(E), Filename); |
358 | } |
359 | |
360 | std::vector<llvm::object::BuildID> BinaryIds; |
361 | if (Error E = Reader->readBinaryIds(BinaryIds)) |
362 | WC->Errors.emplace_back(std::move(E), Filename); |
363 | WC->Writer.addBinaryIds(BinaryIds); |
364 | } |
365 | |
366 | /// Merge the \p Src writer context into \p Dst. |
367 | static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { |
368 | for (auto &ErrorPair : Src->Errors) |
369 | Dst->Errors.push_back(std::move(ErrorPair)); |
370 | Src->Errors.clear(); |
371 | |
372 | if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind())) |
373 | exitWithError(std::move(E)); |
374 | |
375 | Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) { |
376 | auto [ErrorCode, Msg] = InstrProfError::take(std::move(E)); |
377 | std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock}; |
378 | bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second; |
379 | if (firstTime) |
380 | warn(toString(make_error<InstrProfError>(ErrorCode, Msg))); |
381 | }); |
382 | } |
383 | |
384 | static void writeInstrProfile(StringRef OutputFilename, |
385 | ProfileFormat OutputFormat, |
386 | InstrProfWriter &Writer) { |
387 | std::error_code EC; |
388 | raw_fd_ostream Output(OutputFilename.data(), EC, |
389 | OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF |
390 | : sys::fs::OF_None); |
391 | if (EC) |
392 | exitWithErrorCode(EC, OutputFilename); |
393 | |
394 | if (OutputFormat == PF_Text) { |
395 | if (Error E = Writer.writeText(Output)) |
396 | warn(std::move(E)); |
397 | } else { |
398 | if (Output.is_displayed()) |
399 | exitWithError("cannot write a non-text format profile to the terminal"); |
400 | if (Error E = Writer.write(Output)) |
401 | warn(std::move(E)); |
402 | } |
403 | } |
404 | |
405 | static void |
406 | mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename, |
407 | SymbolRemapper *Remapper, StringRef OutputFilename, |
408 | ProfileFormat OutputFormat, uint64_t TraceReservoirSize, |
409 | uint64_t MaxTraceLength, bool OutputSparse, |
410 | unsigned NumThreads, FailureMode FailMode, |
411 | const StringRef ProfiledBinary) { |
412 | if (OutputFormat == PF_Compact_Binary) |
413 | exitWithError("Compact Binary is deprecated"); |
414 | if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary && |
415 | OutputFormat != PF_Text) |
416 | exitWithError("unknown format is specified"); |
417 | |
418 | std::unique_ptr<InstrProfCorrelator> Correlator; |
419 | if (!DebugInfoFilename.empty()) { |
420 | if (auto Err = |
421 | InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator)) |
422 | exitWithError(std::move(Err), DebugInfoFilename); |
423 | if (auto Err = Correlator->correlateProfileData()) |
424 | exitWithError(std::move(Err), DebugInfoFilename); |
425 | } |
426 | |
427 | std::mutex ErrorLock; |
428 | SmallSet<instrprof_error, 4> WriterErrorCodes; |
429 | |
430 | // If NumThreads is not specified, auto-detect a good default. |
431 | if (NumThreads == 0) |
432 | NumThreads = std::min(hardware_concurrency().compute_thread_count(), |
433 | unsigned((Inputs.size() + 1) / 2)); |
434 | |
435 | // Initialize the writer contexts. |
436 | SmallVector<std::unique_ptr<WriterContext>, 4> Contexts; |
437 | for (unsigned I = 0; I < NumThreads; ++I) |
438 | Contexts.emplace_back(std::make_unique<WriterContext>( |
439 | OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize, |
440 | MaxTraceLength)); |
441 | |
442 | if (NumThreads == 1) { |
443 | for (const auto &Input : Inputs) |
444 | loadInput(Input, Remapper, Correlator.get(), ProfiledBinary, |
445 | Contexts[0].get()); |
446 | } else { |
447 | ThreadPool Pool(hardware_concurrency(NumThreads)); |
448 | |
449 | // Load the inputs in parallel (N/NumThreads serial steps). |
450 | unsigned Ctx = 0; |
451 | for (const auto &Input : Inputs) { |
452 | Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary, |
453 | Contexts[Ctx].get()); |
454 | Ctx = (Ctx + 1) % NumThreads; |
455 | } |
456 | Pool.wait(); |
457 | |
458 | // Merge the writer contexts together (~ lg(NumThreads) serial steps). |
459 | unsigned Mid = Contexts.size() / 2; |
460 | unsigned End = Contexts.size(); |
461 | assert(Mid > 0 && "Expected more than one context")(static_cast <bool> (Mid > 0 && "Expected more than one context" ) ? void (0) : __assert_fail ("Mid > 0 && \"Expected more than one context\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 461, __extension__ __PRETTY_FUNCTION__)); |
462 | do { |
463 | for (unsigned I = 0; I < Mid; ++I) |
464 | Pool.async(mergeWriterContexts, Contexts[I].get(), |
465 | Contexts[I + Mid].get()); |
466 | Pool.wait(); |
467 | if (End & 1) { |
468 | Pool.async(mergeWriterContexts, Contexts[0].get(), |
469 | Contexts[End - 1].get()); |
470 | Pool.wait(); |
471 | } |
472 | End = Mid; |
473 | Mid /= 2; |
474 | } while (Mid > 0); |
475 | } |
476 | |
477 | // Handle deferred errors encountered during merging. If the number of errors |
478 | // is equal to the number of inputs the merge failed. |
479 | unsigned NumErrors = 0; |
480 | for (std::unique_ptr<WriterContext> &WC : Contexts) { |
481 | for (auto &ErrorPair : WC->Errors) { |
482 | ++NumErrors; |
483 | warn(toString(std::move(ErrorPair.first)), ErrorPair.second); |
484 | } |
485 | } |
486 | if (NumErrors == Inputs.size() || |
487 | (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) |
488 | exitWithError("no profile can be merged"); |
489 | |
490 | writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); |
491 | } |
492 | |
493 | /// The profile entry for a function in instrumentation profile. |
494 | struct InstrProfileEntry { |
495 | uint64_t MaxCount = 0; |
496 | uint64_t NumEdgeCounters = 0; |
497 | float ZeroCounterRatio = 0.0; |
498 | InstrProfRecord *ProfRecord; |
499 | InstrProfileEntry(InstrProfRecord *Record); |
500 | InstrProfileEntry() = default; |
501 | }; |
502 | |
503 | InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) { |
504 | ProfRecord = Record; |
505 | uint64_t CntNum = Record->Counts.size(); |
506 | uint64_t ZeroCntNum = 0; |
507 | for (size_t I = 0; I < CntNum; ++I) { |
508 | MaxCount = std::max(MaxCount, Record->Counts[I]); |
509 | ZeroCntNum += !Record->Counts[I]; |
510 | } |
511 | ZeroCounterRatio = (float)ZeroCntNum / CntNum; |
512 | NumEdgeCounters = CntNum; |
513 | } |
514 | |
515 | /// Either set all the counters in the instr profile entry \p IFE to |
516 | /// -1 / -2 /in order to drop the profile or scale up the |
517 | /// counters in \p IFP to be above hot / cold threshold. We use |
518 | /// the ratio of zero counters in the profile of a function to |
519 | /// decide the profile is helpful or harmful for performance, |
520 | /// and to choose whether to scale up or drop it. |
521 | static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot, |
522 | uint64_t HotInstrThreshold, |
523 | uint64_t ColdInstrThreshold, |
524 | float ZeroCounterThreshold) { |
525 | InstrProfRecord *ProfRecord = IFE.ProfRecord; |
526 | if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { |
527 | // If all or most of the counters of the function are zero, the |
528 | // profile is unaccountable and should be dropped. Reset all the |
529 | // counters to be -1 / -2 and PGO profile-use will drop the profile. |
530 | // All counters being -1 also implies that the function is hot so |
531 | // PGO profile-use will also set the entry count metadata to be |
532 | // above hot threshold. |
533 | // All counters being -2 implies that the function is warm so |
534 | // PGO profile-use will also set the entry count metadata to be |
535 | // above cold threshold. |
536 | auto Kind = |
537 | (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm); |
538 | ProfRecord->setPseudoCount(Kind); |
539 | return; |
540 | } |
541 | |
542 | // Scale up the MaxCount to be multiple times above hot / cold threshold. |
543 | const unsigned MultiplyFactor = 3; |
544 | uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold); |
545 | uint64_t Numerator = Threshold * MultiplyFactor; |
546 | |
547 | // Make sure Threshold for warm counters is below the HotInstrThreshold. |
548 | if (!SetToHot && Threshold >= HotInstrThreshold) { |
549 | Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2; |
Value stored to 'Threshold' is never read | |
550 | } |
551 | |
552 | uint64_t Denominator = IFE.MaxCount; |
553 | if (Numerator <= Denominator) |
554 | return; |
555 | ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { |
556 | warn(toString(make_error<InstrProfError>(E))); |
557 | }); |
558 | } |
559 | |
560 | const uint64_t ColdPercentileIdx = 15; |
561 | const uint64_t HotPercentileIdx = 11; |
562 | |
563 | using sampleprof::FSDiscriminatorPass; |
564 | |
565 | // Internal options to set FSDiscriminatorPass. Used in merge and show |
566 | // commands. |
567 | static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption( |
568 | "fs-discriminator-pass", cl::init(PassLast), cl::Hidden, |
569 | cl::desc("Zero out the discriminator bits for the FS discrimiantor " |
570 | "pass beyond this value. The enum values are defined in " |
571 | "Support/Discriminator.h"), |
572 | cl::values(clEnumVal(Base, "Use base discriminators only")llvm::cl::OptionEnumValue { "Base", int(Base), "Use base discriminators only" }, |
573 | clEnumVal(Pass1, "Use base and pass 1 discriminators")llvm::cl::OptionEnumValue { "Pass1", int(Pass1), "Use base and pass 1 discriminators" }, |
574 | clEnumVal(Pass2, "Use base and pass 1-2 discriminators")llvm::cl::OptionEnumValue { "Pass2", int(Pass2), "Use base and pass 1-2 discriminators" }, |
575 | clEnumVal(Pass3, "Use base and pass 1-3 discriminators")llvm::cl::OptionEnumValue { "Pass3", int(Pass3), "Use base and pass 1-3 discriminators" }, |
576 | clEnumVal(PassLast, "Use all discriminator bits (default)")llvm::cl::OptionEnumValue { "PassLast", int(PassLast), "Use all discriminator bits (default)" })); |
577 | |
578 | static unsigned getDiscriminatorMask() { |
579 | return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue())); |
580 | } |
581 | |
582 | /// Adjust the instr profile in \p WC based on the sample profile in |
583 | /// \p Reader. |
584 | static void |
585 | adjustInstrProfile(std::unique_ptr<WriterContext> &WC, |
586 | std::unique_ptr<sampleprof::SampleProfileReader> &Reader, |
587 | unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, |
588 | unsigned InstrProfColdThreshold) { |
589 | // Function to its entry in instr profile. |
590 | StringMap<InstrProfileEntry> InstrProfileMap; |
591 | StringMap<StringRef> StaticFuncMap; |
592 | InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); |
593 | |
594 | auto checkSampleProfileHasFUnique = [&Reader]() { |
595 | for (const auto &PD : Reader->getProfiles()) { |
596 | auto &FContext = PD.first; |
597 | if (FContext.toString().find(FunctionSamples::UniqSuffix) != |
598 | std::string::npos) { |
599 | return true; |
600 | } |
601 | } |
602 | return false; |
603 | }; |
604 | |
605 | bool SampleProfileHasFUnique = checkSampleProfileHasFUnique(); |
606 | |
607 | auto buildStaticFuncMap = [&StaticFuncMap, |
608 | SampleProfileHasFUnique](const StringRef Name) { |
609 | std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"}; |
610 | size_t PrefixPos = StringRef::npos; |
611 | for (auto &Prefix : Prefixes) { |
612 | PrefixPos = Name.find_insensitive(Prefix); |
613 | if (PrefixPos == StringRef::npos) |
614 | continue; |
615 | PrefixPos += Prefix.size(); |
616 | break; |
617 | } |
618 | |
619 | if (PrefixPos == StringRef::npos) { |
620 | return; |
621 | } |
622 | |
623 | StringRef NewName = Name.drop_front(PrefixPos); |
624 | StringRef FName = Name.substr(0, PrefixPos - 1); |
625 | if (NewName.size() == 0) { |
626 | return; |
627 | } |
628 | |
629 | // This name should have a static linkage. |
630 | size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix); |
631 | bool ProfileHasFUnique = (PostfixPos != StringRef::npos); |
632 | |
633 | // If sample profile and instrumented profile do not agree on symbol |
634 | // uniqification. |
635 | if (SampleProfileHasFUnique != ProfileHasFUnique) { |
636 | // If instrumented profile uses -funique-internal-linakge-symbols, |
637 | // we need to trim the name. |
638 | if (ProfileHasFUnique) { |
639 | NewName = NewName.substr(0, PostfixPos); |
640 | } else { |
641 | // If sample profile uses -funique-internal-linakge-symbols, |
642 | // we build the map. |
643 | std::string NStr = |
644 | NewName.str() + getUniqueInternalLinkagePostfix(FName); |
645 | NewName = StringRef(NStr); |
646 | StaticFuncMap[NewName] = Name; |
647 | return; |
648 | } |
649 | } |
650 | |
651 | if (!StaticFuncMap.contains(NewName)) { |
652 | StaticFuncMap[NewName] = Name; |
653 | } else { |
654 | StaticFuncMap[NewName] = DuplicateNameStr; |
655 | } |
656 | }; |
657 | |
658 | // We need to flatten the SampleFDO profile as the InstrFDO |
659 | // profile does not have inlined callsite profiles. |
660 | // One caveat is the pre-inlined function -- their samples |
661 | // should be collapsed into the caller function. |
662 | // Here we do a DFS traversal to get the flatten profile |
663 | // info: the sum of entrycount and the max of maxcount. |
664 | // Here is the algorithm: |
665 | // recursive (FS, root_name) { |
666 | // name = FS->getName(); |
667 | // get samples for FS; |
668 | // if (InstrProf.find(name) { |
669 | // root_name = name; |
670 | // } else { |
671 | // if (name is in static_func map) { |
672 | // root_name = static_name; |
673 | // } |
674 | // } |
675 | // update the Map entry for root_name; |
676 | // for (subfs: FS) { |
677 | // recursive(subfs, root_name); |
678 | // } |
679 | // } |
680 | // |
681 | // Here is an example. |
682 | // |
683 | // SampleProfile: |
684 | // foo:12345:1000 |
685 | // 1: 1000 |
686 | // 2.1: 1000 |
687 | // 15: 5000 |
688 | // 4: bar:1000 |
689 | // 1: 1000 |
690 | // 2: goo:3000 |
691 | // 1: 3000 |
692 | // 8: bar:40000 |
693 | // 1: 10000 |
694 | // 2: goo:30000 |
695 | // 1: 30000 |
696 | // |
697 | // InstrProfile has two entries: |
698 | // foo |
699 | // bar.cc:bar |
700 | // |
701 | // After BuildMaxSampleMap, we should have the following in FlattenSampleMap: |
702 | // {"foo", {1000, 5000}} |
703 | // {"bar.cc:bar", {11000, 30000}} |
704 | // |
705 | // foo's has an entry count of 1000, and max body count of 5000. |
706 | // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and |
707 | // 10000), and max count of 30000 (from the callsite in line 8). |
708 | // |
709 | // Note that goo's count will remain in bar.cc:bar() as it does not have an |
710 | // entry in InstrProfile. |
711 | DenseMap<StringRef, std::pair<uint64_t, uint64_t>> FlattenSampleMap; |
712 | auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap, |
713 | &InstrProfileMap](const FunctionSamples &FS, |
714 | const StringRef &RootName) { |
715 | auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS, |
716 | const StringRef &RootName, |
717 | auto &BuildImpl) -> void { |
718 | const StringRef &Name = FS.getName(); |
719 | const StringRef *NewRootName = &RootName; |
720 | uint64_t EntrySample = FS.getHeadSamplesEstimate(); |
721 | uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true); |
722 | |
723 | auto It = InstrProfileMap.find(Name); |
724 | if (It != InstrProfileMap.end()) { |
725 | NewRootName = &Name; |
726 | } else { |
727 | auto NewName = StaticFuncMap.find(Name); |
728 | if (NewName != StaticFuncMap.end()) { |
729 | It = InstrProfileMap.find(NewName->second.str()); |
730 | if (NewName->second != DuplicateNameStr) { |
731 | NewRootName = &NewName->second; |
732 | } |
733 | } else { |
734 | // Here the EntrySample is of an inlined function, so we should not |
735 | // update the EntrySample in the map. |
736 | EntrySample = 0; |
737 | } |
738 | } |
739 | EntrySample += FlattenSampleMap[*NewRootName].first; |
740 | MaxBodySample = |
741 | std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample); |
742 | FlattenSampleMap[*NewRootName] = |
743 | std::make_pair(EntrySample, MaxBodySample); |
744 | |
745 | for (const auto &C : FS.getCallsiteSamples()) |
746 | for (const auto &F : C.second) |
747 | BuildImpl(F.second, *NewRootName, BuildImpl); |
748 | }; |
749 | BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl); |
750 | }; |
751 | |
752 | for (auto &PD : WC->Writer.getProfileData()) { |
753 | // Populate IPBuilder. |
754 | for (const auto &PDV : PD.getValue()) { |
755 | InstrProfRecord Record = PDV.second; |
756 | IPBuilder.addRecord(Record); |
757 | } |
758 | |
759 | // If a function has multiple entries in instr profile, skip it. |
760 | if (PD.getValue().size() != 1) |
761 | continue; |
762 | |
763 | // Initialize InstrProfileMap. |
764 | InstrProfRecord *R = &PD.getValue().begin()->second; |
765 | StringRef FullName = PD.getKey(); |
766 | InstrProfileMap[FullName] = InstrProfileEntry(R); |
767 | buildStaticFuncMap(FullName); |
768 | } |
769 | |
770 | for (auto &PD : Reader->getProfiles()) { |
771 | sampleprof::FunctionSamples &FS = PD.second; |
772 | BuildMaxSampleMap(FS, FS.getName()); |
773 | } |
774 | |
775 | ProfileSummary InstrPS = *IPBuilder.getSummary(); |
776 | ProfileSummary SamplePS = Reader->getSummary(); |
777 | |
778 | // Compute cold thresholds for instr profile and sample profile. |
779 | uint64_t HotSampleThreshold = |
780 | ProfileSummaryBuilder::getEntryForPercentile( |
781 | SamplePS.getDetailedSummary(), |
782 | ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) |
783 | .MinCount; |
784 | uint64_t ColdSampleThreshold = |
785 | ProfileSummaryBuilder::getEntryForPercentile( |
786 | SamplePS.getDetailedSummary(), |
787 | ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) |
788 | .MinCount; |
789 | uint64_t HotInstrThreshold = |
790 | ProfileSummaryBuilder::getEntryForPercentile( |
791 | InstrPS.getDetailedSummary(), |
792 | ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) |
793 | .MinCount; |
794 | uint64_t ColdInstrThreshold = |
795 | InstrProfColdThreshold |
796 | ? InstrProfColdThreshold |
797 | : ProfileSummaryBuilder::getEntryForPercentile( |
798 | InstrPS.getDetailedSummary(), |
799 | ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) |
800 | .MinCount; |
801 | |
802 | // Find hot/warm functions in sample profile which is cold in instr profile |
803 | // and adjust the profiles of those functions in the instr profile. |
804 | for (const auto &E : FlattenSampleMap) { |
805 | uint64_t SampleMaxCount = std::max(E.second.first, E.second.second); |
806 | if (SampleMaxCount < ColdSampleThreshold) |
807 | continue; |
808 | const StringRef &Name = E.first; |
809 | auto It = InstrProfileMap.find(Name); |
810 | if (It == InstrProfileMap.end()) { |
811 | auto NewName = StaticFuncMap.find(Name); |
812 | if (NewName != StaticFuncMap.end()) { |
813 | It = InstrProfileMap.find(NewName->second.str()); |
814 | if (NewName->second == DuplicateNameStr) { |
815 | WithColor::warning() |
816 | << "Static function " << Name |
817 | << " has multiple promoted names, cannot adjust profile.\n"; |
818 | } |
819 | } |
820 | } |
821 | if (It == InstrProfileMap.end() || |
822 | It->second.MaxCount > ColdInstrThreshold || |
823 | It->second.NumEdgeCounters < SupplMinSizeThreshold) |
824 | continue; |
825 | bool SetToHot = SampleMaxCount >= HotSampleThreshold; |
826 | updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold, |
827 | ColdInstrThreshold, ZeroCounterThreshold); |
828 | } |
829 | } |
830 | |
831 | /// The main function to supplement instr profile with sample profile. |
832 | /// \Inputs contains the instr profile. \p SampleFilename specifies the |
833 | /// sample profile. \p OutputFilename specifies the output profile name. |
834 | /// \p OutputFormat specifies the output profile format. \p OutputSparse |
835 | /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold |
836 | /// specifies the minimal size for the functions whose profile will be |
837 | /// adjusted. \p ZeroCounterThreshold is the threshold to check whether |
838 | /// a function contains too many zero counters and whether its profile |
839 | /// should be dropped. \p InstrProfColdThreshold is the user specified |
840 | /// cold threshold which will override the cold threshold got from the |
841 | /// instr profile summary. |
842 | static void supplementInstrProfile( |
843 | const WeightedFileVector &Inputs, StringRef SampleFilename, |
844 | StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, |
845 | unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, |
846 | unsigned InstrProfColdThreshold) { |
847 | if (OutputFilename.compare("-") == 0) |
848 | exitWithError("cannot write indexed profdata format to stdout"); |
849 | if (Inputs.size() != 1) |
850 | exitWithError("expect one input to be an instr profile"); |
851 | if (Inputs[0].Weight != 1) |
852 | exitWithError("expect instr profile doesn't have weight"); |
853 | |
854 | StringRef InstrFilename = Inputs[0].Filename; |
855 | |
856 | // Read sample profile. |
857 | LLVMContext Context; |
858 | auto FS = vfs::getRealFileSystem(); |
859 | auto ReaderOrErr = sampleprof::SampleProfileReader::create( |
860 | SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption); |
861 | if (std::error_code EC = ReaderOrErr.getError()) |
862 | exitWithErrorCode(EC, SampleFilename); |
863 | auto Reader = std::move(ReaderOrErr.get()); |
864 | if (std::error_code EC = Reader->read()) |
865 | exitWithErrorCode(EC, SampleFilename); |
866 | |
867 | // Read instr profile. |
868 | std::mutex ErrorLock; |
869 | SmallSet<instrprof_error, 4> WriterErrorCodes; |
870 | auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock, |
871 | WriterErrorCodes); |
872 | loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get()); |
873 | if (WC->Errors.size() > 0) |
874 | exitWithError(std::move(WC->Errors[0].first), InstrFilename); |
875 | |
876 | adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold, |
877 | InstrProfColdThreshold); |
878 | writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); |
879 | } |
880 | |
881 | /// Make a copy of the given function samples with all symbol names remapped |
882 | /// by the provided symbol remapper. |
883 | static sampleprof::FunctionSamples |
884 | remapSamples(const sampleprof::FunctionSamples &Samples, |
885 | SymbolRemapper &Remapper, sampleprof_error &Error) { |
886 | sampleprof::FunctionSamples Result; |
887 | Result.setName(Remapper(Samples.getName())); |
888 | Result.addTotalSamples(Samples.getTotalSamples()); |
889 | Result.addHeadSamples(Samples.getHeadSamples()); |
890 | for (const auto &BodySample : Samples.getBodySamples()) { |
891 | uint32_t MaskedDiscriminator = |
892 | BodySample.first.Discriminator & getDiscriminatorMask(); |
893 | Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator, |
894 | BodySample.second.getSamples()); |
895 | for (const auto &Target : BodySample.second.getCallTargets()) { |
896 | Result.addCalledTargetSamples(BodySample.first.LineOffset, |
897 | MaskedDiscriminator, |
898 | Remapper(Target.first()), Target.second); |
899 | } |
900 | } |
901 | for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { |
902 | sampleprof::FunctionSamplesMap &Target = |
903 | Result.functionSamplesAt(CallsiteSamples.first); |
904 | for (const auto &Callsite : CallsiteSamples.second) { |
905 | sampleprof::FunctionSamples Remapped = |
906 | remapSamples(Callsite.second, Remapper, Error); |
907 | MergeResult(Error, |
908 | Target[std::string(Remapped.getName())].merge(Remapped)); |
909 | } |
910 | } |
911 | return Result; |
912 | } |
913 | |
914 | static sampleprof::SampleProfileFormat FormatMap[] = { |
915 | sampleprof::SPF_None, |
916 | sampleprof::SPF_Text, |
917 | sampleprof::SPF_None, |
918 | sampleprof::SPF_Ext_Binary, |
919 | sampleprof::SPF_GCC, |
920 | sampleprof::SPF_Binary}; |
921 | |
922 | static std::unique_ptr<MemoryBuffer> |
923 | getInputFileBuf(const StringRef &InputFile) { |
924 | if (InputFile == "") |
925 | return {}; |
926 | |
927 | auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); |
928 | if (!BufOrError) |
929 | exitWithErrorCode(BufOrError.getError(), InputFile); |
930 | |
931 | return std::move(*BufOrError); |
932 | } |
933 | |
934 | static void populateProfileSymbolList(MemoryBuffer *Buffer, |
935 | sampleprof::ProfileSymbolList &PSL) { |
936 | if (!Buffer) |
937 | return; |
938 | |
939 | SmallVector<StringRef, 32> SymbolVec; |
940 | StringRef Data = Buffer->getBuffer(); |
941 | Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
942 | |
943 | for (StringRef SymbolStr : SymbolVec) |
944 | PSL.add(SymbolStr.trim()); |
945 | } |
946 | |
947 | static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, |
948 | ProfileFormat OutputFormat, |
949 | MemoryBuffer *Buffer, |
950 | sampleprof::ProfileSymbolList &WriterList, |
951 | bool CompressAllSections, bool UseMD5, |
952 | bool GenPartialProfile) { |
953 | populateProfileSymbolList(Buffer, WriterList); |
954 | if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) |
955 | warn("Profile Symbol list is not empty but the output format is not " |
956 | "ExtBinary format. The list will be lost in the output. "); |
957 | |
958 | Writer.setProfileSymbolList(&WriterList); |
959 | |
960 | if (CompressAllSections) { |
961 | if (OutputFormat != PF_Ext_Binary) |
962 | warn("-compress-all-section is ignored. Specify -extbinary to enable it"); |
963 | else |
964 | Writer.setToCompressAllSections(); |
965 | } |
966 | if (UseMD5) { |
967 | if (OutputFormat != PF_Ext_Binary) |
968 | warn("-use-md5 is ignored. Specify -extbinary to enable it"); |
969 | else |
970 | Writer.setUseMD5(); |
971 | } |
972 | if (GenPartialProfile) { |
973 | if (OutputFormat != PF_Ext_Binary) |
974 | warn("-gen-partial-profile is ignored. Specify -extbinary to enable it"); |
975 | else |
976 | Writer.setPartialProfile(); |
977 | } |
978 | } |
979 | |
980 | static void |
981 | mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, |
982 | StringRef OutputFilename, ProfileFormat OutputFormat, |
983 | StringRef ProfileSymbolListFile, bool CompressAllSections, |
984 | bool UseMD5, bool GenPartialProfile, |
985 | SampleProfileLayout ProfileLayout, |
986 | bool SampleMergeColdContext, bool SampleTrimColdContext, |
987 | bool SampleColdContextFrameDepth, FailureMode FailMode, |
988 | bool DropProfileSymbolList, size_t OutputSizeLimit) { |
989 | using namespace sampleprof; |
990 | SampleProfileMap ProfileMap; |
991 | SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers; |
992 | LLVMContext Context; |
993 | sampleprof::ProfileSymbolList WriterList; |
994 | std::optional<bool> ProfileIsProbeBased; |
995 | std::optional<bool> ProfileIsCS; |
996 | for (const auto &Input : Inputs) { |
997 | auto FS = vfs::getRealFileSystem(); |
998 | auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS, |
999 | FSDiscriminatorPassOption); |
1000 | if (std::error_code EC = ReaderOrErr.getError()) { |
1001 | warnOrExitGivenError(FailMode, EC, Input.Filename); |
1002 | continue; |
1003 | } |
1004 | |
1005 | // We need to keep the readers around until after all the files are |
1006 | // read so that we do not lose the function names stored in each |
1007 | // reader's memory. The function names are needed to write out the |
1008 | // merged profile map. |
1009 | Readers.push_back(std::move(ReaderOrErr.get())); |
1010 | const auto Reader = Readers.back().get(); |
1011 | if (std::error_code EC = Reader->read()) { |
1012 | warnOrExitGivenError(FailMode, EC, Input.Filename); |
1013 | Readers.pop_back(); |
1014 | continue; |
1015 | } |
1016 | |
1017 | SampleProfileMap &Profiles = Reader->getProfiles(); |
1018 | if (ProfileIsProbeBased && |
1019 | ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased) |
1020 | exitWithError( |
1021 | "cannot merge probe-based profile with non-probe-based profile"); |
1022 | ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased; |
1023 | if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS) |
1024 | exitWithError("cannot merge CS profile with non-CS profile"); |
1025 | ProfileIsCS = FunctionSamples::ProfileIsCS; |
1026 | for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); |
1027 | I != E; ++I) { |
1028 | sampleprof_error Result = sampleprof_error::success; |
1029 | FunctionSamples Remapped = |
1030 | Remapper ? remapSamples(I->second, *Remapper, Result) |
1031 | : FunctionSamples(); |
1032 | FunctionSamples &Samples = Remapper ? Remapped : I->second; |
1033 | SampleContext FContext = Samples.getContext(); |
1034 | MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight)); |
1035 | if (Result != sampleprof_error::success) { |
1036 | std::error_code EC = make_error_code(Result); |
1037 | handleMergeWriterError(errorCodeToError(EC), Input.Filename, |
1038 | FContext.toString()); |
1039 | } |
1040 | } |
1041 | |
1042 | if (!DropProfileSymbolList) { |
1043 | std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList = |
1044 | Reader->getProfileSymbolList(); |
1045 | if (ReaderList) |
1046 | WriterList.merge(*ReaderList); |
1047 | } |
1048 | } |
1049 | |
1050 | if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) { |
1051 | // Use threshold calculated from profile summary unless specified. |
1052 | SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); |
1053 | auto Summary = Builder.computeSummaryForProfiles(ProfileMap); |
1054 | uint64_t SampleProfColdThreshold = |
1055 | ProfileSummaryBuilder::getColdCountThreshold( |
1056 | (Summary->getDetailedSummary())); |
1057 | |
1058 | // Trim and merge cold context profile using cold threshold above; |
1059 | SampleContextTrimmer(ProfileMap) |
1060 | .trimAndMergeColdContextProfiles( |
1061 | SampleProfColdThreshold, SampleTrimColdContext, |
1062 | SampleMergeColdContext, SampleColdContextFrameDepth, false); |
1063 | } |
1064 | |
1065 | if (ProfileLayout == llvm::sampleprof::SPL_Flat) { |
1066 | ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS); |
1067 | ProfileIsCS = FunctionSamples::ProfileIsCS = false; |
1068 | } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) { |
1069 | ProfileConverter CSConverter(ProfileMap); |
1070 | CSConverter.convertCSProfiles(); |
1071 | ProfileIsCS = FunctionSamples::ProfileIsCS = false; |
1072 | } |
1073 | |
1074 | auto WriterOrErr = |
1075 | SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); |
1076 | if (std::error_code EC = WriterOrErr.getError()) |
1077 | exitWithErrorCode(EC, OutputFilename); |
1078 | |
1079 | auto Writer = std::move(WriterOrErr.get()); |
1080 | // WriterList will have StringRef refering to string in Buffer. |
1081 | // Make sure Buffer lives as long as WriterList. |
1082 | auto Buffer = getInputFileBuf(ProfileSymbolListFile); |
1083 | handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, |
1084 | CompressAllSections, UseMD5, GenPartialProfile); |
1085 | |
1086 | // If OutputSizeLimit is 0 (default), it is the same as write(). |
1087 | if (std::error_code EC = |
1088 | Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit)) |
1089 | exitWithErrorCode(std::move(EC)); |
1090 | } |
1091 | |
1092 | static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { |
1093 | StringRef WeightStr, FileName; |
1094 | std::tie(WeightStr, FileName) = WeightedFilename.split(','); |
1095 | |
1096 | uint64_t Weight; |
1097 | if (WeightStr.getAsInteger(10, Weight) || Weight < 1) |
1098 | exitWithError("input weight must be a positive integer"); |
1099 | |
1100 | return {std::string(FileName), Weight}; |
1101 | } |
1102 | |
1103 | static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { |
1104 | StringRef Filename = WF.Filename; |
1105 | uint64_t Weight = WF.Weight; |
1106 | |
1107 | // If it's STDIN just pass it on. |
1108 | if (Filename == "-") { |
1109 | WNI.push_back({std::string(Filename), Weight}); |
1110 | return; |
1111 | } |
1112 | |
1113 | llvm::sys::fs::file_status Status; |
1114 | llvm::sys::fs::status(Filename, Status); |
1115 | if (!llvm::sys::fs::exists(Status)) |
1116 | exitWithErrorCode(make_error_code(errc::no_such_file_or_directory), |
1117 | Filename); |
1118 | // If it's a source file, collect it. |
1119 | if (llvm::sys::fs::is_regular_file(Status)) { |
1120 | WNI.push_back({std::string(Filename), Weight}); |
1121 | return; |
1122 | } |
1123 | |
1124 | if (llvm::sys::fs::is_directory(Status)) { |
1125 | std::error_code EC; |
1126 | for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E; |
1127 | F != E && !EC; F.increment(EC)) { |
1128 | if (llvm::sys::fs::is_regular_file(F->path())) { |
1129 | addWeightedInput(WNI, {F->path(), Weight}); |
1130 | } |
1131 | } |
1132 | if (EC) |
1133 | exitWithErrorCode(EC, Filename); |
1134 | } |
1135 | } |
1136 | |
1137 | static void parseInputFilenamesFile(MemoryBuffer *Buffer, |
1138 | WeightedFileVector &WFV) { |
1139 | if (!Buffer) |
1140 | return; |
1141 | |
1142 | SmallVector<StringRef, 8> Entries; |
1143 | StringRef Data = Buffer->getBuffer(); |
1144 | Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
1145 | for (const StringRef &FileWeightEntry : Entries) { |
1146 | StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r"); |
1147 | // Skip comments. |
1148 | if (SanitizedEntry.startswith("#")) |
1149 | continue; |
1150 | // If there's no comma, it's an unweighted profile. |
1151 | else if (!SanitizedEntry.contains(',')) |
1152 | addWeightedInput(WFV, {std::string(SanitizedEntry), 1}); |
1153 | else |
1154 | addWeightedInput(WFV, parseWeightedFile(SanitizedEntry)); |
1155 | } |
1156 | } |
1157 | |
1158 | static int merge_main(int argc, const char *argv[]) { |
1159 | cl::list<std::string> InputFilenames(cl::Positional, |
1160 | cl::desc("<filename...>")); |
1161 | cl::list<std::string> WeightedInputFilenames("weighted-input", |
1162 | cl::desc("<weight>,<filename>")); |
1163 | cl::opt<std::string> InputFilenamesFile( |
1164 | "input-files", cl::init(""), |
1165 | cl::desc("Path to file containing newline-separated " |
1166 | "[<weight>,]<filename> entries")); |
1167 | cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"), |
1168 | cl::aliasopt(InputFilenamesFile)); |
1169 | cl::opt<bool> DumpInputFileList( |
1170 | "dump-input-file-list", cl::init(false), cl::Hidden, |
1171 | cl::desc("Dump the list of input files and their weights, then exit")); |
1172 | cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"), |
1173 | cl::desc("Symbol remapping file")); |
1174 | cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"), |
1175 | cl::aliasopt(RemappingFile)); |
1176 | cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), |
1177 | cl::init("-"), cl::desc("Output file")); |
1178 | cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), |
1179 | cl::aliasopt(OutputFilename)); |
1180 | cl::opt<ProfileKinds> ProfileKind( |
1181 | cl::desc("Profile kind:"), cl::init(instr), |
1182 | cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)" }, |
1183 | clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile" })); |
1184 | cl::opt<ProfileFormat> OutputFormat( |
1185 | cl::desc("Format of output profile"), cl::init(PF_Ext_Binary), |
1186 | cl::values( |
1187 | clEnumValN(PF_Binary, "binary", "Binary encoding")llvm::cl::OptionEnumValue { "binary", int(PF_Binary), "Binary encoding" }, |
1188 | clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding "llvm::cl::OptionEnumValue { "extbinary", int(PF_Ext_Binary), "Extensible binary encoding " "(default)" } |
1189 | "(default)")llvm::cl::OptionEnumValue { "extbinary", int(PF_Ext_Binary), "Extensible binary encoding " "(default)" }, |
1190 | clEnumValN(PF_Text, "text", "Text encoding")llvm::cl::OptionEnumValue { "text", int(PF_Text), "Text encoding" }, |
1191 | clEnumValN(PF_GCC, "gcc",llvm::cl::OptionEnumValue { "gcc", int(PF_GCC), "GCC encoding (only meaningful for -sample)" } |
1192 | "GCC encoding (only meaningful for -sample)")llvm::cl::OptionEnumValue { "gcc", int(PF_GCC), "GCC encoding (only meaningful for -sample)" })); |
1193 | cl::opt<FailureMode> FailureMode( |
1194 | "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), |
1195 | cl::values(clEnumValN(failIfAnyAreInvalid, "any",llvm::cl::OptionEnumValue { "any", int(failIfAnyAreInvalid), "Fail if any profile is invalid." } |
1196 | "Fail if any profile is invalid.")llvm::cl::OptionEnumValue { "any", int(failIfAnyAreInvalid), "Fail if any profile is invalid." }, |
1197 | clEnumValN(failIfAllAreInvalid, "all",llvm::cl::OptionEnumValue { "all", int(failIfAllAreInvalid), "Fail only if all profiles are invalid." } |
1198 | "Fail only if all profiles are invalid.")llvm::cl::OptionEnumValue { "all", int(failIfAllAreInvalid), "Fail only if all profiles are invalid." })); |
1199 | cl::opt<bool> OutputSparse("sparse", cl::init(false), |
1200 | cl::desc("Generate a sparse profile (only meaningful for -instr)")); |
1201 | cl::opt<unsigned> NumThreads( |
1202 | "num-threads", cl::init(0), |
1203 | cl::desc("Number of merge threads to use (default: autodetect)")); |
1204 | cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), |
1205 | cl::aliasopt(NumThreads)); |
1206 | cl::opt<std::string> ProfileSymbolListFile( |
1207 | "prof-sym-list", cl::init(""), |
1208 | cl::desc("Path to file containing the list of function symbols " |
1209 | "used to populate profile symbol list")); |
1210 | cl::opt<bool> CompressAllSections( |
1211 | "compress-all-sections", cl::init(false), cl::Hidden, |
1212 | cl::desc("Compress all sections when writing the profile (only " |
1213 | "meaningful for -extbinary)")); |
1214 | cl::opt<bool> UseMD5( |
1215 | "use-md5", cl::init(false), cl::Hidden, |
1216 | cl::desc("Choose to use MD5 to represent string in name table (only " |
1217 | "meaningful for -extbinary)")); |
1218 | cl::opt<bool> SampleMergeColdContext( |
1219 | "sample-merge-cold-context", cl::init(false), cl::Hidden, |
1220 | cl::desc( |
1221 | "Merge context sample profiles whose count is below cold threshold")); |
1222 | cl::opt<bool> SampleTrimColdContext( |
1223 | "sample-trim-cold-context", cl::init(false), cl::Hidden, |
1224 | cl::desc( |
1225 | "Trim context sample profiles whose count is below cold threshold")); |
1226 | cl::opt<uint32_t> SampleColdContextFrameDepth( |
1227 | "sample-frame-depth-for-cold-context", cl::init(1), |
1228 | cl::desc("Keep the last K frames while merging cold profile. 1 means the " |
1229 | "context-less base profile")); |
1230 | cl::opt<size_t> OutputSizeLimit( |
1231 | "output-size-limit", cl::init(0), cl::Hidden, |
1232 | cl::desc("Trim cold functions until profile size is below specified " |
1233 | "limit in bytes. This uses a heursitic and functions may be " |
1234 | "excessively trimmed")); |
1235 | cl::opt<bool> GenPartialProfile( |
1236 | "gen-partial-profile", cl::init(false), cl::Hidden, |
1237 | cl::desc("Generate a partial profile (only meaningful for -extbinary)")); |
1238 | cl::opt<std::string> SupplInstrWithSample( |
1239 | "supplement-instr-with-sample", cl::init(""), cl::Hidden, |
1240 | cl::desc("Supplement an instr profile with sample profile, to correct " |
1241 | "the profile unrepresentativeness issue. The sample " |
1242 | "profile is the input of the flag. Output will be in instr " |
1243 | "format (The flag only works with -instr)")); |
1244 | cl::opt<float> ZeroCounterThreshold( |
1245 | "zero-counter-threshold", cl::init(0.7), cl::Hidden, |
1246 | cl::desc("For the function which is cold in instr profile but hot in " |
1247 | "sample profile, if the ratio of the number of zero counters " |
1248 | "divided by the total number of counters is above the " |
1249 | "threshold, the profile of the function will be regarded as " |
1250 | "being harmful for performance and will be dropped.")); |
1251 | cl::opt<unsigned> SupplMinSizeThreshold( |
1252 | "suppl-min-size-threshold", cl::init(10), cl::Hidden, |
1253 | cl::desc("If the size of a function is smaller than the threshold, " |
1254 | "assume it can be inlined by PGO early inliner and it won't " |
1255 | "be adjusted based on sample profile.")); |
1256 | cl::opt<unsigned> InstrProfColdThreshold( |
1257 | "instr-prof-cold-threshold", cl::init(0), cl::Hidden, |
1258 | cl::desc("User specified cold threshold for instr profile which will " |
1259 | "override the cold threshold got from profile summary. ")); |
1260 | cl::opt<SampleProfileLayout> ProfileLayout( |
1261 | "convert-sample-profile-layout", |
1262 | cl::desc("Convert the generated profile to a profile with a new layout"), |
1263 | cl::init(SPL_None), |
1264 | cl::values( |
1265 | clEnumValN(SPL_Nest, "nest",llvm::cl::OptionEnumValue { "nest", int(SPL_Nest), "Nested profile, the input should be CS flat profile" } |
1266 | "Nested profile, the input should be CS flat profile")llvm::cl::OptionEnumValue { "nest", int(SPL_Nest), "Nested profile, the input should be CS flat profile" }, |
1267 | clEnumValN(SPL_Flat, "flat",llvm::cl::OptionEnumValue { "flat", int(SPL_Flat), "Profile with nested inlinee flatten out" } |
1268 | "Profile with nested inlinee flatten out")llvm::cl::OptionEnumValue { "flat", int(SPL_Flat), "Profile with nested inlinee flatten out" })); |
1269 | cl::opt<std::string> DebugInfoFilename( |
1270 | "debug-info", cl::init(""), |
1271 | cl::desc("Use the provided debug info to correlate the raw profile.")); |
1272 | cl::opt<std::string> ProfiledBinary( |
1273 | "profiled-binary", cl::init(""), |
1274 | cl::desc("Path to binary from which the profile was collected.")); |
1275 | cl::opt<bool> DropProfileSymbolList( |
1276 | "drop-profile-symbol-list", cl::init(false), cl::Hidden, |
1277 | cl::desc("Drop the profile symbol list when merging AutoFDO profiles " |
1278 | "(only meaningful for -sample)")); |
1279 | // WARNING: This reservoir size value is propagated to any input indexed |
1280 | // profiles for simplicity. Changing this value between invocations could |
1281 | // result in sample bias. |
1282 | cl::opt<uint64_t> TemporalProfTraceReservoirSize( |
1283 | "temporal-profile-trace-reservoir-size", cl::init(100), |
1284 | cl::desc("The maximum number of stored temporal profile traces (default: " |
1285 | "100)")); |
1286 | cl::opt<uint64_t> TemporalProfMaxTraceLength( |
1287 | "temporal-profile-max-trace-length", cl::init(10000), |
1288 | cl::desc("The maximum length of a single temporal profile trace " |
1289 | "(default: 10000)")); |
1290 | |
1291 | cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); |
1292 | |
1293 | WeightedFileVector WeightedInputs; |
1294 | for (StringRef Filename : InputFilenames) |
1295 | addWeightedInput(WeightedInputs, {std::string(Filename), 1}); |
1296 | for (StringRef WeightedFilename : WeightedInputFilenames) |
1297 | addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename)); |
1298 | |
1299 | // Make sure that the file buffer stays alive for the duration of the |
1300 | // weighted input vector's lifetime. |
1301 | auto Buffer = getInputFileBuf(InputFilenamesFile); |
1302 | parseInputFilenamesFile(Buffer.get(), WeightedInputs); |
1303 | |
1304 | if (WeightedInputs.empty()) |
1305 | exitWithError("no input files specified. See " + |
1306 | sys::path::filename(argv[0]) + " -help"); |
1307 | |
1308 | if (DumpInputFileList) { |
1309 | for (auto &WF : WeightedInputs) |
1310 | outs() << WF.Weight << "," << WF.Filename << "\n"; |
1311 | return 0; |
1312 | } |
1313 | |
1314 | std::unique_ptr<SymbolRemapper> Remapper; |
1315 | if (!RemappingFile.empty()) |
1316 | Remapper = SymbolRemapper::create(RemappingFile); |
1317 | |
1318 | if (!SupplInstrWithSample.empty()) { |
1319 | if (ProfileKind != instr) |
1320 | exitWithError( |
1321 | "-supplement-instr-with-sample can only work with -instr. "); |
1322 | |
1323 | supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename, |
1324 | OutputFormat, OutputSparse, SupplMinSizeThreshold, |
1325 | ZeroCounterThreshold, InstrProfColdThreshold); |
1326 | return 0; |
1327 | } |
1328 | |
1329 | if (ProfileKind == instr) |
1330 | mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), |
1331 | OutputFilename, OutputFormat, |
1332 | TemporalProfTraceReservoirSize, |
1333 | TemporalProfMaxTraceLength, OutputSparse, NumThreads, |
1334 | FailureMode, ProfiledBinary); |
1335 | else |
1336 | mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, |
1337 | OutputFormat, ProfileSymbolListFile, CompressAllSections, |
1338 | UseMD5, GenPartialProfile, ProfileLayout, |
1339 | SampleMergeColdContext, SampleTrimColdContext, |
1340 | SampleColdContextFrameDepth, FailureMode, |
1341 | DropProfileSymbolList, OutputSizeLimit); |
1342 | return 0; |
1343 | } |
1344 | |
1345 | /// Computer the overlap b/w profile BaseFilename and profile TestFilename. |
1346 | static void overlapInstrProfile(const std::string &BaseFilename, |
1347 | const std::string &TestFilename, |
1348 | const OverlapFuncFilters &FuncFilter, |
1349 | raw_fd_ostream &OS, bool IsCS) { |
1350 | std::mutex ErrorLock; |
1351 | SmallSet<instrprof_error, 4> WriterErrorCodes; |
1352 | WriterContext Context(false, ErrorLock, WriterErrorCodes); |
1353 | WeightedFile WeightedInput{BaseFilename, 1}; |
1354 | OverlapStats Overlap; |
1355 | Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS); |
1356 | if (E) |
1357 | exitWithError(std::move(E), "error in getting profile count sums"); |
1358 | if (Overlap.Base.CountSum < 1.0f) { |
1359 | OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n"; |
1360 | exit(0); |
1361 | } |
1362 | if (Overlap.Test.CountSum < 1.0f) { |
1363 | OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; |
1364 | exit(0); |
1365 | } |
1366 | loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context); |
1367 | overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, |
1368 | IsCS); |
1369 | Overlap.dump(OS); |
1370 | } |
1371 | |
1372 | namespace { |
1373 | struct SampleOverlapStats { |
1374 | SampleContext BaseName; |
1375 | SampleContext TestName; |
1376 | // Number of overlap units |
1377 | uint64_t OverlapCount; |
1378 | // Total samples of overlap units |
1379 | uint64_t OverlapSample; |
1380 | // Number of and total samples of units that only present in base or test |
1381 | // profile |
1382 | uint64_t BaseUniqueCount; |
1383 | uint64_t BaseUniqueSample; |
1384 | uint64_t TestUniqueCount; |
1385 | uint64_t TestUniqueSample; |
1386 | // Number of units and total samples in base or test profile |
1387 | uint64_t BaseCount; |
1388 | uint64_t BaseSample; |
1389 | uint64_t TestCount; |
1390 | uint64_t TestSample; |
1391 | // Number of and total samples of units that present in at least one profile |
1392 | uint64_t UnionCount; |
1393 | uint64_t UnionSample; |
1394 | // Weighted similarity |
1395 | double Similarity; |
1396 | // For SampleOverlapStats instances representing functions, weights of the |
1397 | // function in base and test profiles |
1398 | double BaseWeight; |
1399 | double TestWeight; |
1400 | |
1401 | SampleOverlapStats() |
1402 | : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0), |
1403 | BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0), |
1404 | BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0), |
1405 | UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {} |
1406 | }; |
1407 | } // end anonymous namespace |
1408 | |
1409 | namespace { |
1410 | struct FuncSampleStats { |
1411 | uint64_t SampleSum; |
1412 | uint64_t MaxSample; |
1413 | uint64_t HotBlockCount; |
1414 | FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {} |
1415 | FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample, |
1416 | uint64_t HotBlockCount) |
1417 | : SampleSum(SampleSum), MaxSample(MaxSample), |
1418 | HotBlockCount(HotBlockCount) {} |
1419 | }; |
1420 | } // end anonymous namespace |
1421 | |
1422 | namespace { |
1423 | enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None }; |
1424 | |
1425 | // Class for updating merging steps for two sorted maps. The class should be |
1426 | // instantiated with a map iterator type. |
1427 | template <class T> class MatchStep { |
1428 | public: |
1429 | MatchStep() = delete; |
1430 | |
1431 | MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd) |
1432 | : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter), |
1433 | SecondEnd(SecondEnd), Status(MS_None) {} |
1434 | |
1435 | bool areBothFinished() const { |
1436 | return (FirstIter == FirstEnd && SecondIter == SecondEnd); |
1437 | } |
1438 | |
1439 | bool isFirstFinished() const { return FirstIter == FirstEnd; } |
1440 | |
1441 | bool isSecondFinished() const { return SecondIter == SecondEnd; } |
1442 | |
1443 | /// Advance one step based on the previous match status unless the previous |
1444 | /// status is MS_None. Then update Status based on the comparison between two |
1445 | /// container iterators at the current step. If the previous status is |
1446 | /// MS_None, it means two iterators are at the beginning and no comparison has |
1447 | /// been made, so we simply update Status without advancing the iterators. |
1448 | void updateOneStep(); |
1449 | |
1450 | T getFirstIter() const { return FirstIter; } |
1451 | |
1452 | T getSecondIter() const { return SecondIter; } |
1453 | |
1454 | MatchStatus getMatchStatus() const { return Status; } |
1455 | |
1456 | private: |
1457 | // Current iterator and end iterator of the first container. |
1458 | T FirstIter; |
1459 | T FirstEnd; |
1460 | // Current iterator and end iterator of the second container. |
1461 | T SecondIter; |
1462 | T SecondEnd; |
1463 | // Match status of the current step. |
1464 | MatchStatus Status; |
1465 | }; |
1466 | } // end anonymous namespace |
1467 | |
1468 | template <class T> void MatchStep<T>::updateOneStep() { |
1469 | switch (Status) { |
1470 | case MS_Match: |
1471 | ++FirstIter; |
1472 | ++SecondIter; |
1473 | break; |
1474 | case MS_FirstUnique: |
1475 | ++FirstIter; |
1476 | break; |
1477 | case MS_SecondUnique: |
1478 | ++SecondIter; |
1479 | break; |
1480 | case MS_None: |
1481 | break; |
1482 | } |
1483 | |
1484 | // Update Status according to iterators at the current step. |
1485 | if (areBothFinished()) |
1486 | return; |
1487 | if (FirstIter != FirstEnd && |
1488 | (SecondIter == SecondEnd || FirstIter->first < SecondIter->first)) |
1489 | Status = MS_FirstUnique; |
1490 | else if (SecondIter != SecondEnd && |
1491 | (FirstIter == FirstEnd || SecondIter->first < FirstIter->first)) |
1492 | Status = MS_SecondUnique; |
1493 | else |
1494 | Status = MS_Match; |
1495 | } |
1496 | |
1497 | // Return the sum of line/block samples, the max line/block sample, and the |
1498 | // number of line/block samples above the given threshold in a function |
1499 | // including its inlinees. |
1500 | static void getFuncSampleStats(const sampleprof::FunctionSamples &Func, |
1501 | FuncSampleStats &FuncStats, |
1502 | uint64_t HotThreshold) { |
1503 | for (const auto &L : Func.getBodySamples()) { |
1504 | uint64_t Sample = L.second.getSamples(); |
1505 | FuncStats.SampleSum += Sample; |
1506 | FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample); |
1507 | if (Sample >= HotThreshold) |
1508 | ++FuncStats.HotBlockCount; |
1509 | } |
1510 | |
1511 | for (const auto &C : Func.getCallsiteSamples()) { |
1512 | for (const auto &F : C.second) |
1513 | getFuncSampleStats(F.second, FuncStats, HotThreshold); |
1514 | } |
1515 | } |
1516 | |
1517 | /// Predicate that determines if a function is hot with a given threshold. We |
1518 | /// keep it separate from its callsites for possible extension in the future. |
1519 | static bool isFunctionHot(const FuncSampleStats &FuncStats, |
1520 | uint64_t HotThreshold) { |
1521 | // We intentionally compare the maximum sample count in a function with the |
1522 | // HotThreshold to get an approximate determination on hot functions. |
1523 | return (FuncStats.MaxSample >= HotThreshold); |
1524 | } |
1525 | |
1526 | namespace { |
1527 | class SampleOverlapAggregator { |
1528 | public: |
1529 | SampleOverlapAggregator(const std::string &BaseFilename, |
1530 | const std::string &TestFilename, |
1531 | double LowSimilarityThreshold, double Epsilon, |
1532 | const OverlapFuncFilters &FuncFilter) |
1533 | : BaseFilename(BaseFilename), TestFilename(TestFilename), |
1534 | LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon), |
1535 | FuncFilter(FuncFilter) {} |
1536 | |
1537 | /// Detect 0-sample input profile and report to output stream. This interface |
1538 | /// should be called after loadProfiles(). |
1539 | bool detectZeroSampleProfile(raw_fd_ostream &OS) const; |
1540 | |
1541 | /// Write out function-level similarity statistics for functions specified by |
1542 | /// options --function, --value-cutoff, and --similarity-cutoff. |
1543 | void dumpFuncSimilarity(raw_fd_ostream &OS) const; |
1544 | |
1545 | /// Write out program-level similarity and overlap statistics. |
1546 | void dumpProgramSummary(raw_fd_ostream &OS) const; |
1547 | |
1548 | /// Write out hot-function and hot-block statistics for base_profile, |
1549 | /// test_profile, and their overlap. For both cases, the overlap HO is |
1550 | /// calculated as follows: |
1551 | /// Given the number of functions (or blocks) that are hot in both profiles |
1552 | /// HCommon and the number of functions (or blocks) that are hot in at |
1553 | /// least one profile HUnion, HO = HCommon / HUnion. |
1554 | void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const; |
1555 | |
1556 | /// This function tries matching functions in base and test profiles. For each |
1557 | /// pair of matched functions, it aggregates the function-level |
1558 | /// similarity into a profile-level similarity. It also dump function-level |
1559 | /// similarity information of functions specified by --function, |
1560 | /// --value-cutoff, and --similarity-cutoff options. The program-level |
1561 | /// similarity PS is computed as follows: |
1562 | /// Given function-level similarity FS(A) for all function A, the |
1563 | /// weight of function A in base profile WB(A), and the weight of function |
1564 | /// A in test profile WT(A), compute PS(base_profile, test_profile) = |
1565 | /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0 |
1566 | /// meaning no-overlap. |
1567 | void computeSampleProfileOverlap(raw_fd_ostream &OS); |
1568 | |
1569 | /// Initialize ProfOverlap with the sum of samples in base and test |
1570 | /// profiles. This function also computes and keeps the sum of samples and |
1571 | /// max sample counts of each function in BaseStats and TestStats for later |
1572 | /// use to avoid re-computations. |
1573 | void initializeSampleProfileOverlap(); |
1574 | |
1575 | /// Load profiles specified by BaseFilename and TestFilename. |
1576 | std::error_code loadProfiles(); |
1577 | |
1578 | using FuncSampleStatsMap = |
1579 | std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>; |
1580 | |
1581 | private: |
1582 | SampleOverlapStats ProfOverlap; |
1583 | SampleOverlapStats HotFuncOverlap; |
1584 | SampleOverlapStats HotBlockOverlap; |
1585 | std::string BaseFilename; |
1586 | std::string TestFilename; |
1587 | std::unique_ptr<sampleprof::SampleProfileReader> BaseReader; |
1588 | std::unique_ptr<sampleprof::SampleProfileReader> TestReader; |
1589 | // BaseStats and TestStats hold FuncSampleStats for each function, with |
1590 | // function name as the key. |
1591 | FuncSampleStatsMap BaseStats; |
1592 | FuncSampleStatsMap TestStats; |
1593 | // Low similarity threshold in floating point number |
1594 | double LowSimilarityThreshold; |
1595 | // Block samples above BaseHotThreshold or TestHotThreshold are considered hot |
1596 | // for tracking hot blocks. |
1597 | uint64_t BaseHotThreshold; |
1598 | uint64_t TestHotThreshold; |
1599 | // A small threshold used to round the results of floating point accumulations |
1600 | // to resolve imprecision. |
1601 | const double Epsilon; |
1602 | std::multimap<double, SampleOverlapStats, std::greater<double>> |
1603 | FuncSimilarityDump; |
1604 | // FuncFilter carries specifications in options --value-cutoff and |
1605 | // --function. |
1606 | OverlapFuncFilters FuncFilter; |
1607 | // Column offsets for printing the function-level details table. |
1608 | static const unsigned int TestWeightCol = 15; |
1609 | static const unsigned int SimilarityCol = 30; |
1610 | static const unsigned int OverlapCol = 43; |
1611 | static const unsigned int BaseUniqueCol = 53; |
1612 | static const unsigned int TestUniqueCol = 67; |
1613 | static const unsigned int BaseSampleCol = 81; |
1614 | static const unsigned int TestSampleCol = 96; |
1615 | static const unsigned int FuncNameCol = 111; |
1616 | |
1617 | /// Return a similarity of two line/block sample counters in the same |
1618 | /// function in base and test profiles. The line/block-similarity BS(i) is |
1619 | /// computed as follows: |
1620 | /// For an offsets i, given the sample count at i in base profile BB(i), |
1621 | /// the sample count at i in test profile BT(i), the sum of sample counts |
1622 | /// in this function in base profile SB, and the sum of sample counts in |
1623 | /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB - |
1624 | /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap. |
1625 | double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample, |
1626 | const SampleOverlapStats &FuncOverlap) const; |
1627 | |
1628 | void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample, |
1629 | uint64_t HotBlockCount); |
1630 | |
1631 | void getHotFunctions(const FuncSampleStatsMap &ProfStats, |
1632 | FuncSampleStatsMap &HotFunc, |
1633 | uint64_t HotThreshold) const; |
1634 | |
1635 | void computeHotFuncOverlap(); |
1636 | |
1637 | /// This function updates statistics in FuncOverlap, HotBlockOverlap, and |
1638 | /// Difference for two sample units in a matched function according to the |
1639 | /// given match status. |
1640 | void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample, |
1641 | uint64_t HotBlockCount, |
1642 | SampleOverlapStats &FuncOverlap, |
1643 | double &Difference, MatchStatus Status); |
1644 | |
1645 | /// This function updates statistics in FuncOverlap, HotBlockOverlap, and |
1646 | /// Difference for unmatched callees that only present in one profile in a |
1647 | /// matched caller function. |
1648 | void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func, |
1649 | SampleOverlapStats &FuncOverlap, |
1650 | double &Difference, MatchStatus Status); |
1651 | |
1652 | /// This function updates sample overlap statistics of an overlap function in |
1653 | /// base and test profile. It also calculates a function-internal similarity |
1654 | /// FIS as follows: |
1655 | /// For offsets i that have samples in at least one profile in this |
1656 | /// function A, given BS(i) returned by computeBlockSimilarity(), compute |
1657 | /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with |
1658 | /// 0.0 meaning no overlap. |
1659 | double computeSampleFunctionInternalOverlap( |
1660 | const sampleprof::FunctionSamples &BaseFunc, |
1661 | const sampleprof::FunctionSamples &TestFunc, |
1662 | SampleOverlapStats &FuncOverlap); |
1663 | |
1664 | /// Function-level similarity (FS) is a weighted value over function internal |
1665 | /// similarity (FIS). This function computes a function's FS from its FIS by |
1666 | /// applying the weight. |
1667 | double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample, |
1668 | uint64_t TestFuncSample) const; |
1669 | |
1670 | /// The function-level similarity FS(A) for a function A is computed as |
1671 | /// follows: |
1672 | /// Compute a function-internal similarity FIS(A) by |
1673 | /// computeSampleFunctionInternalOverlap(). Then, with the weight of |
1674 | /// function A in base profile WB(A), and the weight of function A in test |
1675 | /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A))) |
1676 | /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap. |
1677 | double |
1678 | computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc, |
1679 | const sampleprof::FunctionSamples *TestFunc, |
1680 | SampleOverlapStats *FuncOverlap, |
1681 | uint64_t BaseFuncSample, |
1682 | uint64_t TestFuncSample); |
1683 | |
1684 | /// Profile-level similarity (PS) is a weighted aggregate over function-level |
1685 | /// similarities (FS). This method weights the FS value by the function |
1686 | /// weights in the base and test profiles for the aggregation. |
1687 | double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample, |
1688 | uint64_t TestFuncSample) const; |
1689 | }; |
1690 | } // end anonymous namespace |
1691 | |
1692 | bool SampleOverlapAggregator::detectZeroSampleProfile( |
1693 | raw_fd_ostream &OS) const { |
1694 | bool HaveZeroSample = false; |
1695 | if (ProfOverlap.BaseSample == 0) { |
1696 | OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n"; |
1697 | HaveZeroSample = true; |
1698 | } |
1699 | if (ProfOverlap.TestSample == 0) { |
1700 | OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n"; |
1701 | HaveZeroSample = true; |
1702 | } |
1703 | return HaveZeroSample; |
1704 | } |
1705 | |
1706 | double SampleOverlapAggregator::computeBlockSimilarity( |
1707 | uint64_t BaseSample, uint64_t TestSample, |
1708 | const SampleOverlapStats &FuncOverlap) const { |
1709 | double BaseFrac = 0.0; |
1710 | double TestFrac = 0.0; |
1711 | if (FuncOverlap.BaseSample > 0) |
1712 | BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample; |
1713 | if (FuncOverlap.TestSample > 0) |
1714 | TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample; |
1715 | return 1.0 - std::fabs(BaseFrac - TestFrac); |
1716 | } |
1717 | |
1718 | void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample, |
1719 | uint64_t TestSample, |
1720 | uint64_t HotBlockCount) { |
1721 | bool IsBaseHot = (BaseSample >= BaseHotThreshold); |
1722 | bool IsTestHot = (TestSample >= TestHotThreshold); |
1723 | if (!IsBaseHot && !IsTestHot) |
1724 | return; |
1725 | |
1726 | HotBlockOverlap.UnionCount += HotBlockCount; |
1727 | if (IsBaseHot) |
1728 | HotBlockOverlap.BaseCount += HotBlockCount; |
1729 | if (IsTestHot) |
1730 | HotBlockOverlap.TestCount += HotBlockCount; |
1731 | if (IsBaseHot && IsTestHot) |
1732 | HotBlockOverlap.OverlapCount += HotBlockCount; |
1733 | } |
1734 | |
1735 | void SampleOverlapAggregator::getHotFunctions( |
1736 | const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc, |
1737 | uint64_t HotThreshold) const { |
1738 | for (const auto &F : ProfStats) { |
1739 | if (isFunctionHot(F.second, HotThreshold)) |
1740 | HotFunc.emplace(F.first, F.second); |
1741 | } |
1742 | } |
1743 | |
1744 | void SampleOverlapAggregator::computeHotFuncOverlap() { |
1745 | FuncSampleStatsMap BaseHotFunc; |
1746 | getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold); |
1747 | HotFuncOverlap.BaseCount = BaseHotFunc.size(); |
1748 | |
1749 | FuncSampleStatsMap TestHotFunc; |
1750 | getHotFunctions(TestStats, TestHotFunc, TestHotThreshold); |
1751 | HotFuncOverlap.TestCount = TestHotFunc.size(); |
1752 | HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount; |
1753 | |
1754 | for (const auto &F : BaseHotFunc) { |
1755 | if (TestHotFunc.count(F.first)) |
1756 | ++HotFuncOverlap.OverlapCount; |
1757 | else |
1758 | ++HotFuncOverlap.UnionCount; |
1759 | } |
1760 | } |
1761 | |
1762 | void SampleOverlapAggregator::updateOverlapStatsForFunction( |
1763 | uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount, |
1764 | SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) { |
1765 | assert(Status != MS_None &&(static_cast <bool> (Status != MS_None && "Match status should be updated before updating overlap statistics" ) ? void (0) : __assert_fail ("Status != MS_None && \"Match status should be updated before updating overlap statistics\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1766, __extension__ __PRETTY_FUNCTION__)) |
1766 | "Match status should be updated before updating overlap statistics")(static_cast <bool> (Status != MS_None && "Match status should be updated before updating overlap statistics" ) ? void (0) : __assert_fail ("Status != MS_None && \"Match status should be updated before updating overlap statistics\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1766, __extension__ __PRETTY_FUNCTION__)); |
1767 | if (Status == MS_FirstUnique) { |
1768 | TestSample = 0; |
1769 | FuncOverlap.BaseUniqueSample += BaseSample; |
1770 | } else if (Status == MS_SecondUnique) { |
1771 | BaseSample = 0; |
1772 | FuncOverlap.TestUniqueSample += TestSample; |
1773 | } else { |
1774 | ++FuncOverlap.OverlapCount; |
1775 | } |
1776 | |
1777 | FuncOverlap.UnionSample += std::max(BaseSample, TestSample); |
1778 | FuncOverlap.OverlapSample += std::min(BaseSample, TestSample); |
1779 | Difference += |
1780 | 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap); |
1781 | updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount); |
1782 | } |
1783 | |
1784 | void SampleOverlapAggregator::updateForUnmatchedCallee( |
1785 | const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap, |
1786 | double &Difference, MatchStatus Status) { |
1787 | assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&(static_cast <bool> ((Status == MS_FirstUnique || Status == MS_SecondUnique) && "Status must be either of the two unmatched cases" ) ? void (0) : __assert_fail ("(Status == MS_FirstUnique || Status == MS_SecondUnique) && \"Status must be either of the two unmatched cases\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1788, __extension__ __PRETTY_FUNCTION__)) |
1788 | "Status must be either of the two unmatched cases")(static_cast <bool> ((Status == MS_FirstUnique || Status == MS_SecondUnique) && "Status must be either of the two unmatched cases" ) ? void (0) : __assert_fail ("(Status == MS_FirstUnique || Status == MS_SecondUnique) && \"Status must be either of the two unmatched cases\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1788, __extension__ __PRETTY_FUNCTION__)); |
1789 | FuncSampleStats FuncStats; |
1790 | if (Status == MS_FirstUnique) { |
1791 | getFuncSampleStats(Func, FuncStats, BaseHotThreshold); |
1792 | updateOverlapStatsForFunction(FuncStats.SampleSum, 0, |
1793 | FuncStats.HotBlockCount, FuncOverlap, |
1794 | Difference, Status); |
1795 | } else { |
1796 | getFuncSampleStats(Func, FuncStats, TestHotThreshold); |
1797 | updateOverlapStatsForFunction(0, FuncStats.SampleSum, |
1798 | FuncStats.HotBlockCount, FuncOverlap, |
1799 | Difference, Status); |
1800 | } |
1801 | } |
1802 | |
1803 | double SampleOverlapAggregator::computeSampleFunctionInternalOverlap( |
1804 | const sampleprof::FunctionSamples &BaseFunc, |
1805 | const sampleprof::FunctionSamples &TestFunc, |
1806 | SampleOverlapStats &FuncOverlap) { |
1807 | |
1808 | using namespace sampleprof; |
1809 | |
1810 | double Difference = 0; |
1811 | |
1812 | // Accumulate Difference for regular line/block samples in the function. |
1813 | // We match them through sort-merge join algorithm because |
1814 | // FunctionSamples::getBodySamples() returns a map of sample counters ordered |
1815 | // by their offsets. |
1816 | MatchStep<BodySampleMap::const_iterator> BlockIterStep( |
1817 | BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(), |
1818 | TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend()); |
1819 | BlockIterStep.updateOneStep(); |
1820 | while (!BlockIterStep.areBothFinished()) { |
1821 | uint64_t BaseSample = |
1822 | BlockIterStep.isFirstFinished() |
1823 | ? 0 |
1824 | : BlockIterStep.getFirstIter()->second.getSamples(); |
1825 | uint64_t TestSample = |
1826 | BlockIterStep.isSecondFinished() |
1827 | ? 0 |
1828 | : BlockIterStep.getSecondIter()->second.getSamples(); |
1829 | updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap, |
1830 | Difference, BlockIterStep.getMatchStatus()); |
1831 | |
1832 | BlockIterStep.updateOneStep(); |
1833 | } |
1834 | |
1835 | // Accumulate Difference for callsite lines in the function. We match |
1836 | // them through sort-merge algorithm because |
1837 | // FunctionSamples::getCallsiteSamples() returns a map of callsite records |
1838 | // ordered by their offsets. |
1839 | MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep( |
1840 | BaseFunc.getCallsiteSamples().cbegin(), |
1841 | BaseFunc.getCallsiteSamples().cend(), |
1842 | TestFunc.getCallsiteSamples().cbegin(), |
1843 | TestFunc.getCallsiteSamples().cend()); |
1844 | CallsiteIterStep.updateOneStep(); |
1845 | while (!CallsiteIterStep.areBothFinished()) { |
1846 | MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus(); |
1847 | assert(CallsiteStepStatus != MS_None &&(static_cast <bool> (CallsiteStepStatus != MS_None && "Match status should be updated before entering loop body") ? void (0) : __assert_fail ("CallsiteStepStatus != MS_None && \"Match status should be updated before entering loop body\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1848, __extension__ __PRETTY_FUNCTION__)) |
1848 | "Match status should be updated before entering loop body")(static_cast <bool> (CallsiteStepStatus != MS_None && "Match status should be updated before entering loop body") ? void (0) : __assert_fail ("CallsiteStepStatus != MS_None && \"Match status should be updated before entering loop body\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1848, __extension__ __PRETTY_FUNCTION__)); |
1849 | |
1850 | if (CallsiteStepStatus != MS_Match) { |
1851 | auto Callsite = (CallsiteStepStatus == MS_FirstUnique) |
1852 | ? CallsiteIterStep.getFirstIter() |
1853 | : CallsiteIterStep.getSecondIter(); |
1854 | for (const auto &F : Callsite->second) |
1855 | updateForUnmatchedCallee(F.second, FuncOverlap, Difference, |
1856 | CallsiteStepStatus); |
1857 | } else { |
1858 | // There may be multiple inlinees at the same offset, so we need to try |
1859 | // matching all of them. This match is implemented through sort-merge |
1860 | // algorithm because callsite records at the same offset are ordered by |
1861 | // function names. |
1862 | MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep( |
1863 | CallsiteIterStep.getFirstIter()->second.cbegin(), |
1864 | CallsiteIterStep.getFirstIter()->second.cend(), |
1865 | CallsiteIterStep.getSecondIter()->second.cbegin(), |
1866 | CallsiteIterStep.getSecondIter()->second.cend()); |
1867 | CalleeIterStep.updateOneStep(); |
1868 | while (!CalleeIterStep.areBothFinished()) { |
1869 | MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus(); |
1870 | if (CalleeStepStatus != MS_Match) { |
1871 | auto Callee = (CalleeStepStatus == MS_FirstUnique) |
1872 | ? CalleeIterStep.getFirstIter() |
1873 | : CalleeIterStep.getSecondIter(); |
1874 | updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference, |
1875 | CalleeStepStatus); |
1876 | } else { |
1877 | // An inlined function can contain other inlinees inside, so compute |
1878 | // the Difference recursively. |
1879 | Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap( |
1880 | CalleeIterStep.getFirstIter()->second, |
1881 | CalleeIterStep.getSecondIter()->second, |
1882 | FuncOverlap); |
1883 | } |
1884 | CalleeIterStep.updateOneStep(); |
1885 | } |
1886 | } |
1887 | CallsiteIterStep.updateOneStep(); |
1888 | } |
1889 | |
1890 | // Difference reflects the total differences of line/block samples in this |
1891 | // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to |
1892 | // reflect the similarity between function profiles in [0.0f to 1.0f]. |
1893 | return (2.0 - Difference) / 2; |
1894 | } |
1895 | |
1896 | double SampleOverlapAggregator::weightForFuncSimilarity( |
1897 | double FuncInternalSimilarity, uint64_t BaseFuncSample, |
1898 | uint64_t TestFuncSample) const { |
1899 | // Compute the weight as the distance between the function weights in two |
1900 | // profiles. |
1901 | double BaseFrac = 0.0; |
1902 | double TestFrac = 0.0; |
1903 | assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1904, __extension__ __PRETTY_FUNCTION__)) |
1904 | "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1904, __extension__ __PRETTY_FUNCTION__)); |
1905 | BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample; |
1906 | assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1907, __extension__ __PRETTY_FUNCTION__)) |
1907 | "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1907, __extension__ __PRETTY_FUNCTION__)); |
1908 | TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample; |
1909 | double WeightDistance = std::fabs(BaseFrac - TestFrac); |
1910 | |
1911 | // Take WeightDistance into the similarity. |
1912 | return FuncInternalSimilarity * (1 - WeightDistance); |
1913 | } |
1914 | |
1915 | double |
1916 | SampleOverlapAggregator::weightByImportance(double FuncSimilarity, |
1917 | uint64_t BaseFuncSample, |
1918 | uint64_t TestFuncSample) const { |
1919 | |
1920 | double BaseFrac = 0.0; |
1921 | double TestFrac = 0.0; |
1922 | assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1923, __extension__ __PRETTY_FUNCTION__)) |
1923 | "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1923, __extension__ __PRETTY_FUNCTION__)); |
1924 | BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0; |
1925 | assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1926, __extension__ __PRETTY_FUNCTION__)) |
1926 | "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1926, __extension__ __PRETTY_FUNCTION__)); |
1927 | TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0; |
1928 | return FuncSimilarity * (BaseFrac + TestFrac); |
1929 | } |
1930 | |
1931 | double SampleOverlapAggregator::computeSampleFunctionOverlap( |
1932 | const sampleprof::FunctionSamples *BaseFunc, |
1933 | const sampleprof::FunctionSamples *TestFunc, |
1934 | SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample, |
1935 | uint64_t TestFuncSample) { |
1936 | // Default function internal similarity before weighted, meaning two functions |
1937 | // has no overlap. |
1938 | const double DefaultFuncInternalSimilarity = 0; |
1939 | double FuncSimilarity; |
1940 | double FuncInternalSimilarity; |
1941 | |
1942 | // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap. |
1943 | // In this case, we use DefaultFuncInternalSimilarity as the function internal |
1944 | // similarity. |
1945 | if (!BaseFunc || !TestFunc) { |
1946 | FuncInternalSimilarity = DefaultFuncInternalSimilarity; |
1947 | } else { |
1948 | assert(FuncOverlap != nullptr &&(static_cast <bool> (FuncOverlap != nullptr && "FuncOverlap should be provided in this case" ) ? void (0) : __assert_fail ("FuncOverlap != nullptr && \"FuncOverlap should be provided in this case\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1949, __extension__ __PRETTY_FUNCTION__)) |
1949 | "FuncOverlap should be provided in this case")(static_cast <bool> (FuncOverlap != nullptr && "FuncOverlap should be provided in this case" ) ? void (0) : __assert_fail ("FuncOverlap != nullptr && \"FuncOverlap should be provided in this case\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1949, __extension__ __PRETTY_FUNCTION__)); |
1950 | FuncInternalSimilarity = computeSampleFunctionInternalOverlap( |
1951 | *BaseFunc, *TestFunc, *FuncOverlap); |
1952 | // Now, FuncInternalSimilarity may be a little less than 0 due to |
1953 | // imprecision of floating point accumulations. Make it zero if the |
1954 | // difference is below Epsilon. |
1955 | FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon) |
1956 | ? 0 |
1957 | : FuncInternalSimilarity; |
1958 | } |
1959 | FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity, |
1960 | BaseFuncSample, TestFuncSample); |
1961 | return FuncSimilarity; |
1962 | } |
1963 | |
1964 | void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) { |
1965 | using namespace sampleprof; |
1966 | |
1967 | std::unordered_map<SampleContext, const FunctionSamples *, |
1968 | SampleContext::Hash> |
1969 | BaseFuncProf; |
1970 | const auto &BaseProfiles = BaseReader->getProfiles(); |
1971 | for (const auto &BaseFunc : BaseProfiles) { |
1972 | BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second)); |
1973 | } |
1974 | ProfOverlap.UnionCount = BaseFuncProf.size(); |
1975 | |
1976 | const auto &TestProfiles = TestReader->getProfiles(); |
1977 | for (const auto &TestFunc : TestProfiles) { |
1978 | SampleOverlapStats FuncOverlap; |
1979 | FuncOverlap.TestName = TestFunc.second.getContext(); |
1980 | assert(TestStats.count(FuncOverlap.TestName) &&(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__ __PRETTY_FUNCTION__)) |
1981 | "TestStats should have records for all functions in test profile "(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__ __PRETTY_FUNCTION__)) |
1982 | "except inlinees")(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 1982, __extension__ __PRETTY_FUNCTION__)); |
1983 | FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum; |
1984 | |
1985 | bool Matched = false; |
1986 | const auto Match = BaseFuncProf.find(FuncOverlap.TestName); |
1987 | if (Match == BaseFuncProf.end()) { |
1988 | const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName]; |
1989 | ++ProfOverlap.TestUniqueCount; |
1990 | ProfOverlap.TestUniqueSample += FuncStats.SampleSum; |
1991 | FuncOverlap.TestUniqueSample = FuncStats.SampleSum; |
1992 | |
1993 | updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount); |
1994 | |
1995 | double FuncSimilarity = computeSampleFunctionOverlap( |
1996 | nullptr, nullptr, nullptr, 0, FuncStats.SampleSum); |
1997 | ProfOverlap.Similarity += |
1998 | weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum); |
1999 | |
2000 | ++ProfOverlap.UnionCount; |
2001 | ProfOverlap.UnionSample += FuncStats.SampleSum; |
2002 | } else { |
2003 | ++ProfOverlap.OverlapCount; |
2004 | |
2005 | // Two functions match with each other. Compute function-level overlap and |
2006 | // aggregate them into profile-level overlap. |
2007 | FuncOverlap.BaseName = Match->second->getContext(); |
2008 | assert(BaseStats.count(FuncOverlap.BaseName) &&(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName ) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__ __PRETTY_FUNCTION__)) |
2009 | "BaseStats should have records for all functions in base profile "(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName ) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__ __PRETTY_FUNCTION__)) |
2010 | "except inlinees")(static_cast <bool> (BaseStats.count(FuncOverlap.BaseName ) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(FuncOverlap.BaseName) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2010, __extension__ __PRETTY_FUNCTION__)); |
2011 | FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum; |
2012 | |
2013 | FuncOverlap.Similarity = computeSampleFunctionOverlap( |
2014 | Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample, |
2015 | FuncOverlap.TestSample); |
2016 | ProfOverlap.Similarity += |
2017 | weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample, |
2018 | FuncOverlap.TestSample); |
2019 | ProfOverlap.OverlapSample += FuncOverlap.OverlapSample; |
2020 | ProfOverlap.UnionSample += FuncOverlap.UnionSample; |
2021 | |
2022 | // Accumulate the percentage of base unique and test unique samples into |
2023 | // ProfOverlap. |
2024 | ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample; |
2025 | ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample; |
2026 | |
2027 | // Remove matched base functions for later reporting functions not found |
2028 | // in test profile. |
2029 | BaseFuncProf.erase(Match); |
2030 | Matched = true; |
2031 | } |
2032 | |
2033 | // Print function-level similarity information if specified by options. |
2034 | assert(TestStats.count(FuncOverlap.TestName) &&(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__ __PRETTY_FUNCTION__)) |
2035 | "TestStats should have records for all functions in test profile "(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__ __PRETTY_FUNCTION__)) |
2036 | "except inlinees")(static_cast <bool> (TestStats.count(FuncOverlap.TestName ) && "TestStats should have records for all functions in test profile " "except inlinees") ? void (0) : __assert_fail ("TestStats.count(FuncOverlap.TestName) && \"TestStats should have records for all functions in test profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2036, __extension__ __PRETTY_FUNCTION__)); |
2037 | if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff || |
2038 | (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) || |
2039 | (Matched && !FuncFilter.NameFilter.empty() && |
2040 | FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) != |
2041 | std::string::npos)) { |
2042 | assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2043, __extension__ __PRETTY_FUNCTION__)) |
2043 | "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2043, __extension__ __PRETTY_FUNCTION__)); |
2044 | FuncOverlap.BaseWeight = |
2045 | static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample; |
2046 | assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2047, __extension__ __PRETTY_FUNCTION__)) |
2047 | "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2047, __extension__ __PRETTY_FUNCTION__)); |
2048 | FuncOverlap.TestWeight = |
2049 | static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample; |
2050 | FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap); |
2051 | } |
2052 | } |
2053 | |
2054 | // Traverse through functions in base profile but not in test profile. |
2055 | for (const auto &F : BaseFuncProf) { |
2056 | assert(BaseStats.count(F.second->getContext()) &&(static_cast <bool> (BaseStats.count(F.second->getContext ()) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__ __PRETTY_FUNCTION__)) |
2057 | "BaseStats should have records for all functions in base profile "(static_cast <bool> (BaseStats.count(F.second->getContext ()) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__ __PRETTY_FUNCTION__)) |
2058 | "except inlinees")(static_cast <bool> (BaseStats.count(F.second->getContext ()) && "BaseStats should have records for all functions in base profile " "except inlinees") ? void (0) : __assert_fail ("BaseStats.count(F.second->getContext()) && \"BaseStats should have records for all functions in base profile \" \"except inlinees\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2058, __extension__ __PRETTY_FUNCTION__)); |
2059 | const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()]; |
2060 | ++ProfOverlap.BaseUniqueCount; |
2061 | ProfOverlap.BaseUniqueSample += FuncStats.SampleSum; |
2062 | |
2063 | updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount); |
2064 | |
2065 | double FuncSimilarity = computeSampleFunctionOverlap( |
2066 | nullptr, nullptr, nullptr, FuncStats.SampleSum, 0); |
2067 | ProfOverlap.Similarity += |
2068 | weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0); |
2069 | |
2070 | ProfOverlap.UnionSample += FuncStats.SampleSum; |
2071 | } |
2072 | |
2073 | // Now, ProfSimilarity may be a little greater than 1 due to imprecision |
2074 | // of floating point accumulations. Make it 1.0 if the difference is below |
2075 | // Epsilon. |
2076 | ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon) |
2077 | ? 1 |
2078 | : ProfOverlap.Similarity; |
2079 | |
2080 | computeHotFuncOverlap(); |
2081 | } |
2082 | |
2083 | void SampleOverlapAggregator::initializeSampleProfileOverlap() { |
2084 | const auto &BaseProf = BaseReader->getProfiles(); |
2085 | for (const auto &I : BaseProf) { |
2086 | ++ProfOverlap.BaseCount; |
2087 | FuncSampleStats FuncStats; |
2088 | getFuncSampleStats(I.second, FuncStats, BaseHotThreshold); |
2089 | ProfOverlap.BaseSample += FuncStats.SampleSum; |
2090 | BaseStats.emplace(I.second.getContext(), FuncStats); |
2091 | } |
2092 | |
2093 | const auto &TestProf = TestReader->getProfiles(); |
2094 | for (const auto &I : TestProf) { |
2095 | ++ProfOverlap.TestCount; |
2096 | FuncSampleStats FuncStats; |
2097 | getFuncSampleStats(I.second, FuncStats, TestHotThreshold); |
2098 | ProfOverlap.TestSample += FuncStats.SampleSum; |
2099 | TestStats.emplace(I.second.getContext(), FuncStats); |
2100 | } |
2101 | |
2102 | ProfOverlap.BaseName = StringRef(BaseFilename); |
2103 | ProfOverlap.TestName = StringRef(TestFilename); |
2104 | } |
2105 | |
2106 | void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const { |
2107 | using namespace sampleprof; |
2108 | |
2109 | if (FuncSimilarityDump.empty()) |
2110 | return; |
2111 | |
2112 | formatted_raw_ostream FOS(OS); |
2113 | FOS << "Function-level details:\n"; |
2114 | FOS << "Base weight"; |
2115 | FOS.PadToColumn(TestWeightCol); |
2116 | FOS << "Test weight"; |
2117 | FOS.PadToColumn(SimilarityCol); |
2118 | FOS << "Similarity"; |
2119 | FOS.PadToColumn(OverlapCol); |
2120 | FOS << "Overlap"; |
2121 | FOS.PadToColumn(BaseUniqueCol); |
2122 | FOS << "Base unique"; |
2123 | FOS.PadToColumn(TestUniqueCol); |
2124 | FOS << "Test unique"; |
2125 | FOS.PadToColumn(BaseSampleCol); |
2126 | FOS << "Base samples"; |
2127 | FOS.PadToColumn(TestSampleCol); |
2128 | FOS << "Test samples"; |
2129 | FOS.PadToColumn(FuncNameCol); |
2130 | FOS << "Function name\n"; |
2131 | for (const auto &F : FuncSimilarityDump) { |
2132 | double OverlapPercent = |
2133 | F.second.UnionSample > 0 |
2134 | ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample |
2135 | : 0; |
2136 | double BaseUniquePercent = |
2137 | F.second.BaseSample > 0 |
2138 | ? static_cast<double>(F.second.BaseUniqueSample) / |
2139 | F.second.BaseSample |
2140 | : 0; |
2141 | double TestUniquePercent = |
2142 | F.second.TestSample > 0 |
2143 | ? static_cast<double>(F.second.TestUniqueSample) / |
2144 | F.second.TestSample |
2145 | : 0; |
2146 | |
2147 | FOS << format("%.2f%%", F.second.BaseWeight * 100); |
2148 | FOS.PadToColumn(TestWeightCol); |
2149 | FOS << format("%.2f%%", F.second.TestWeight * 100); |
2150 | FOS.PadToColumn(SimilarityCol); |
2151 | FOS << format("%.2f%%", F.second.Similarity * 100); |
2152 | FOS.PadToColumn(OverlapCol); |
2153 | FOS << format("%.2f%%", OverlapPercent * 100); |
2154 | FOS.PadToColumn(BaseUniqueCol); |
2155 | FOS << format("%.2f%%", BaseUniquePercent * 100); |
2156 | FOS.PadToColumn(TestUniqueCol); |
2157 | FOS << format("%.2f%%", TestUniquePercent * 100); |
2158 | FOS.PadToColumn(BaseSampleCol); |
2159 | FOS << F.second.BaseSample; |
2160 | FOS.PadToColumn(TestSampleCol); |
2161 | FOS << F.second.TestSample; |
2162 | FOS.PadToColumn(FuncNameCol); |
2163 | FOS << F.second.TestName.toString() << "\n"; |
2164 | } |
2165 | } |
2166 | |
2167 | void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const { |
2168 | OS << "Profile overlap infomation for base_profile: " |
2169 | << ProfOverlap.BaseName.toString() |
2170 | << " and test_profile: " << ProfOverlap.TestName.toString() |
2171 | << "\nProgram level:\n"; |
2172 | |
2173 | OS << " Whole program profile similarity: " |
2174 | << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n"; |
2175 | |
2176 | assert(ProfOverlap.UnionSample > 0 &&(static_cast <bool> (ProfOverlap.UnionSample > 0 && "Total samples in two profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.UnionSample > 0 && \"Total samples in two profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2177, __extension__ __PRETTY_FUNCTION__)) |
2177 | "Total samples in two profile should be greater than 0")(static_cast <bool> (ProfOverlap.UnionSample > 0 && "Total samples in two profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.UnionSample > 0 && \"Total samples in two profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2177, __extension__ __PRETTY_FUNCTION__)); |
2178 | double OverlapPercent = |
2179 | static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample; |
2180 | assert(ProfOverlap.BaseSample > 0 &&(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2181, __extension__ __PRETTY_FUNCTION__)) |
2181 | "Total samples in base profile should be greater than 0")(static_cast <bool> (ProfOverlap.BaseSample > 0 && "Total samples in base profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.BaseSample > 0 && \"Total samples in base profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2181, __extension__ __PRETTY_FUNCTION__)); |
2182 | double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) / |
2183 | ProfOverlap.BaseSample; |
2184 | assert(ProfOverlap.TestSample > 0 &&(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2185, __extension__ __PRETTY_FUNCTION__)) |
2185 | "Total samples in test profile should be greater than 0")(static_cast <bool> (ProfOverlap.TestSample > 0 && "Total samples in test profile should be greater than 0") ? void (0) : __assert_fail ("ProfOverlap.TestSample > 0 && \"Total samples in test profile should be greater than 0\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2185, __extension__ __PRETTY_FUNCTION__)); |
2186 | double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) / |
2187 | ProfOverlap.TestSample; |
2188 | |
2189 | OS << " Whole program sample overlap: " |
2190 | << format("%.3f%%", OverlapPercent * 100) << "\n"; |
2191 | OS << " percentage of samples unique in base profile: " |
2192 | << format("%.3f%%", BaseUniquePercent * 100) << "\n"; |
2193 | OS << " percentage of samples unique in test profile: " |
2194 | << format("%.3f%%", TestUniquePercent * 100) << "\n"; |
2195 | OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n" |
2196 | << " total samples in test profile: " << ProfOverlap.TestSample << "\n"; |
2197 | |
2198 | assert(ProfOverlap.UnionCount > 0 &&(static_cast <bool> (ProfOverlap.UnionCount > 0 && "There should be at least one function in two input profiles" ) ? void (0) : __assert_fail ("ProfOverlap.UnionCount > 0 && \"There should be at least one function in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2199, __extension__ __PRETTY_FUNCTION__)) |
2199 | "There should be at least one function in two input profiles")(static_cast <bool> (ProfOverlap.UnionCount > 0 && "There should be at least one function in two input profiles" ) ? void (0) : __assert_fail ("ProfOverlap.UnionCount > 0 && \"There should be at least one function in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2199, __extension__ __PRETTY_FUNCTION__)); |
2200 | double FuncOverlapPercent = |
2201 | static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount; |
2202 | OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100) |
2203 | << "\n"; |
2204 | OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n"; |
2205 | OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount |
2206 | << "\n"; |
2207 | OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount |
2208 | << "\n"; |
2209 | } |
2210 | |
2211 | void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap( |
2212 | raw_fd_ostream &OS) const { |
2213 | assert(HotFuncOverlap.UnionCount > 0 &&(static_cast <bool> (HotFuncOverlap.UnionCount > 0 && "There should be at least one hot function in two input profiles" ) ? void (0) : __assert_fail ("HotFuncOverlap.UnionCount > 0 && \"There should be at least one hot function in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2214, __extension__ __PRETTY_FUNCTION__)) |
2214 | "There should be at least one hot function in two input profiles")(static_cast <bool> (HotFuncOverlap.UnionCount > 0 && "There should be at least one hot function in two input profiles" ) ? void (0) : __assert_fail ("HotFuncOverlap.UnionCount > 0 && \"There should be at least one hot function in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2214, __extension__ __PRETTY_FUNCTION__)); |
2215 | OS << " Hot-function overlap: " |
2216 | << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) / |
2217 | HotFuncOverlap.UnionCount * 100) |
2218 | << "\n"; |
2219 | OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n"; |
2220 | OS << " hot functions unique in base profile: " |
2221 | << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n"; |
2222 | OS << " hot functions unique in test profile: " |
2223 | << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n"; |
2224 | |
2225 | assert(HotBlockOverlap.UnionCount > 0 &&(static_cast <bool> (HotBlockOverlap.UnionCount > 0 && "There should be at least one hot block in two input profiles" ) ? void (0) : __assert_fail ("HotBlockOverlap.UnionCount > 0 && \"There should be at least one hot block in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2226, __extension__ __PRETTY_FUNCTION__)) |
2226 | "There should be at least one hot block in two input profiles")(static_cast <bool> (HotBlockOverlap.UnionCount > 0 && "There should be at least one hot block in two input profiles" ) ? void (0) : __assert_fail ("HotBlockOverlap.UnionCount > 0 && \"There should be at least one hot block in two input profiles\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2226, __extension__ __PRETTY_FUNCTION__)); |
2227 | OS << " Hot-block overlap: " |
2228 | << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) / |
2229 | HotBlockOverlap.UnionCount * 100) |
2230 | << "\n"; |
2231 | OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n"; |
2232 | OS << " hot blocks unique in base profile: " |
2233 | << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n"; |
2234 | OS << " hot blocks unique in test profile: " |
2235 | << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n"; |
2236 | } |
2237 | |
2238 | std::error_code SampleOverlapAggregator::loadProfiles() { |
2239 | using namespace sampleprof; |
2240 | |
2241 | LLVMContext Context; |
2242 | auto FS = vfs::getRealFileSystem(); |
2243 | auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS, |
2244 | FSDiscriminatorPassOption); |
2245 | if (std::error_code EC = BaseReaderOrErr.getError()) |
2246 | exitWithErrorCode(EC, BaseFilename); |
2247 | |
2248 | auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS, |
2249 | FSDiscriminatorPassOption); |
2250 | if (std::error_code EC = TestReaderOrErr.getError()) |
2251 | exitWithErrorCode(EC, TestFilename); |
2252 | |
2253 | BaseReader = std::move(BaseReaderOrErr.get()); |
2254 | TestReader = std::move(TestReaderOrErr.get()); |
2255 | |
2256 | if (std::error_code EC = BaseReader->read()) |
2257 | exitWithErrorCode(EC, BaseFilename); |
2258 | if (std::error_code EC = TestReader->read()) |
2259 | exitWithErrorCode(EC, TestFilename); |
2260 | if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased()) |
2261 | exitWithError( |
2262 | "cannot compare probe-based profile with non-probe-based profile"); |
2263 | if (BaseReader->profileIsCS() != TestReader->profileIsCS()) |
2264 | exitWithError("cannot compare CS profile with non-CS profile"); |
2265 | |
2266 | // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in |
2267 | // profile summary. |
2268 | ProfileSummary &BasePS = BaseReader->getSummary(); |
2269 | ProfileSummary &TestPS = TestReader->getSummary(); |
2270 | BaseHotThreshold = |
2271 | ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary()); |
2272 | TestHotThreshold = |
2273 | ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary()); |
2274 | |
2275 | return std::error_code(); |
2276 | } |
2277 | |
2278 | void overlapSampleProfile(const std::string &BaseFilename, |
2279 | const std::string &TestFilename, |
2280 | const OverlapFuncFilters &FuncFilter, |
2281 | uint64_t SimilarityCutoff, raw_fd_ostream &OS) { |
2282 | using namespace sampleprof; |
2283 | |
2284 | // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics |
2285 | // report 2--3 places after decimal point in percentage numbers. |
2286 | SampleOverlapAggregator OverlapAggr( |
2287 | BaseFilename, TestFilename, |
2288 | static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter); |
2289 | if (std::error_code EC = OverlapAggr.loadProfiles()) |
2290 | exitWithErrorCode(EC); |
2291 | |
2292 | OverlapAggr.initializeSampleProfileOverlap(); |
2293 | if (OverlapAggr.detectZeroSampleProfile(OS)) |
2294 | return; |
2295 | |
2296 | OverlapAggr.computeSampleProfileOverlap(OS); |
2297 | |
2298 | OverlapAggr.dumpProgramSummary(OS); |
2299 | OverlapAggr.dumpHotFuncAndBlockOverlap(OS); |
2300 | OverlapAggr.dumpFuncSimilarity(OS); |
2301 | } |
2302 | |
2303 | static int overlap_main(int argc, const char *argv[]) { |
2304 | cl::opt<std::string> BaseFilename(cl::Positional, cl::Required, |
2305 | cl::desc("<base profile file>")); |
2306 | cl::opt<std::string> TestFilename(cl::Positional, cl::Required, |
2307 | cl::desc("<test profile file>")); |
2308 | cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"), |
2309 | cl::desc("Output file")); |
2310 | cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output)); |
2311 | cl::opt<bool> IsCS( |
2312 | "cs", cl::init(false), |
2313 | cl::desc("For context sensitive PGO counts. Does not work with CSSPGO.")); |
2314 | cl::opt<unsigned long long> ValueCutoff( |
2315 | "value-cutoff", cl::init(-1), |
2316 | cl::desc( |
2317 | "Function level overlap information for every function (with calling " |
2318 | "context for csspgo) in test " |
2319 | "profile with max count value greater then the parameter value")); |
2320 | cl::opt<std::string> FuncNameFilter( |
2321 | "function", |
2322 | cl::desc("Function level overlap information for matching functions. For " |
2323 | "CSSPGO this takes a a function name with calling context")); |
2324 | cl::opt<unsigned long long> SimilarityCutoff( |
2325 | "similarity-cutoff", cl::init(0), |
2326 | cl::desc("For sample profiles, list function names (with calling context " |
2327 | "for csspgo) for overlapped functions " |
2328 | "with similarities below the cutoff (percentage times 10000).")); |
2329 | cl::opt<ProfileKinds> ProfileKind( |
2330 | cl::desc("Profile kind:"), cl::init(instr), |
2331 | cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)" }, |
2332 | clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile" })); |
2333 | cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); |
2334 | |
2335 | std::error_code EC; |
2336 | raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF); |
2337 | if (EC) |
2338 | exitWithErrorCode(EC, Output); |
2339 | |
2340 | if (ProfileKind == instr) |
2341 | overlapInstrProfile(BaseFilename, TestFilename, |
2342 | OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS, |
2343 | IsCS); |
2344 | else |
2345 | overlapSampleProfile(BaseFilename, TestFilename, |
2346 | OverlapFuncFilters{ValueCutoff, FuncNameFilter}, |
2347 | SimilarityCutoff, OS); |
2348 | |
2349 | return 0; |
2350 | } |
2351 | |
2352 | namespace { |
2353 | struct ValueSitesStats { |
2354 | ValueSitesStats() |
2355 | : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), |
2356 | TotalNumValues(0) {} |
2357 | uint64_t TotalNumValueSites; |
2358 | uint64_t TotalNumValueSitesWithValueProfile; |
2359 | uint64_t TotalNumValues; |
2360 | std::vector<unsigned> ValueSitesHistogram; |
2361 | }; |
2362 | } // namespace |
2363 | |
2364 | static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK, |
2365 | ValueSitesStats &Stats, raw_fd_ostream &OS, |
2366 | InstrProfSymtab *Symtab) { |
2367 | uint32_t NS = Func.getNumValueSites(VK); |
2368 | Stats.TotalNumValueSites += NS; |
2369 | for (size_t I = 0; I < NS; ++I) { |
2370 | uint32_t NV = Func.getNumValueDataForSite(VK, I); |
2371 | std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I); |
2372 | Stats.TotalNumValues += NV; |
2373 | if (NV) { |
2374 | Stats.TotalNumValueSitesWithValueProfile++; |
2375 | if (NV > Stats.ValueSitesHistogram.size()) |
2376 | Stats.ValueSitesHistogram.resize(NV, 0); |
2377 | Stats.ValueSitesHistogram[NV - 1]++; |
2378 | } |
2379 | |
2380 | uint64_t SiteSum = 0; |
2381 | for (uint32_t V = 0; V < NV; V++) |
2382 | SiteSum += VD[V].Count; |
2383 | if (SiteSum == 0) |
2384 | SiteSum = 1; |
2385 | |
2386 | for (uint32_t V = 0; V < NV; V++) { |
2387 | OS << "\t[ " << format("%2u", I) << ", "; |
2388 | if (Symtab == nullptr) |
2389 | OS << format("%4" PRIu64"l" "u", VD[V].Value); |
2390 | else |
2391 | OS << Symtab->getFuncName(VD[V].Value); |
2392 | OS << ", " << format("%10" PRId64"l" "d", VD[V].Count) << " ] (" |
2393 | << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n"; |
2394 | } |
2395 | } |
2396 | } |
2397 | |
2398 | static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK, |
2399 | ValueSitesStats &Stats) { |
2400 | OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n"; |
2401 | OS << " Total number of sites with values: " |
2402 | << Stats.TotalNumValueSitesWithValueProfile << "\n"; |
2403 | OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n"; |
2404 | |
2405 | OS << " Value sites histogram:\n\tNumTargets, SiteCount\n"; |
2406 | for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) { |
2407 | if (Stats.ValueSitesHistogram[I] > 0) |
2408 | OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n"; |
2409 | } |
2410 | } |
2411 | |
2412 | static int showInstrProfile( |
2413 | const std::string &Filename, bool ShowCounts, uint32_t TopN, |
2414 | bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, |
2415 | std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions, |
2416 | bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow, |
2417 | const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds, |
2418 | bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces, |
2419 | ShowFormat SFormat, raw_fd_ostream &OS) { |
2420 | if (SFormat == ShowFormat::Json) |
2421 | exitWithError("JSON output is not supported for instr profiles"); |
2422 | if (SFormat == ShowFormat::Yaml) |
2423 | exitWithError("YAML output is not supported for instr profiles"); |
2424 | auto FS = vfs::getRealFileSystem(); |
2425 | auto ReaderOrErr = InstrProfReader::create(Filename, *FS); |
2426 | std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs); |
2427 | if (ShowDetailedSummary && Cutoffs.empty()) { |
2428 | Cutoffs = ProfileSummaryBuilder::DefaultCutoffs; |
2429 | } |
2430 | InstrProfSummaryBuilder Builder(std::move(Cutoffs)); |
2431 | if (Error E = ReaderOrErr.takeError()) |
2432 | exitWithError(std::move(E), Filename); |
2433 | |
2434 | auto Reader = std::move(ReaderOrErr.get()); |
2435 | bool IsIRInstr = Reader->isIRLevelProfile(); |
2436 | size_t ShownFunctions = 0; |
2437 | size_t BelowCutoffFunctions = 0; |
2438 | int NumVPKind = IPVK_Last - IPVK_First + 1; |
2439 | std::vector<ValueSitesStats> VPStats(NumVPKind); |
2440 | |
2441 | auto MinCmp = [](const std::pair<std::string, uint64_t> &v1, |
2442 | const std::pair<std::string, uint64_t> &v2) { |
2443 | return v1.second > v2.second; |
2444 | }; |
2445 | |
2446 | std::priority_queue<std::pair<std::string, uint64_t>, |
2447 | std::vector<std::pair<std::string, uint64_t>>, |
2448 | decltype(MinCmp)> |
2449 | HottestFuncs(MinCmp); |
2450 | |
2451 | if (!TextFormat && OnlyListBelow) { |
2452 | OS << "The list of functions with the maximum counter less than " |
2453 | << ValueCutoff << ":\n"; |
2454 | } |
2455 | |
2456 | // Add marker so that IR-level instrumentation round-trips properly. |
2457 | if (TextFormat && IsIRInstr) |
2458 | OS << ":ir\n"; |
2459 | |
2460 | for (const auto &Func : *Reader) { |
2461 | if (Reader->isIRLevelProfile()) { |
2462 | bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); |
2463 | if (FuncIsCS != ShowCS) |
2464 | continue; |
2465 | } |
2466 | bool Show = ShowAllFunctions || |
2467 | (!ShowFunction.empty() && Func.Name.contains(ShowFunction)); |
2468 | |
2469 | bool doTextFormatDump = (Show && TextFormat); |
2470 | |
2471 | if (doTextFormatDump) { |
2472 | InstrProfSymtab &Symtab = Reader->getSymtab(); |
2473 | InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab, |
2474 | OS); |
2475 | continue; |
2476 | } |
2477 | |
2478 | assert(Func.Counts.size() > 0 && "function missing entry counter")(static_cast <bool> (Func.Counts.size() > 0 && "function missing entry counter") ? void (0) : __assert_fail ("Func.Counts.size() > 0 && \"function missing entry counter\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2478, __extension__ __PRETTY_FUNCTION__)); |
2479 | Builder.addRecord(Func); |
2480 | |
2481 | if (ShowCovered) { |
2482 | if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; })) |
2483 | OS << Func.Name << "\n"; |
2484 | continue; |
2485 | } |
2486 | |
2487 | uint64_t FuncMax = 0; |
2488 | uint64_t FuncSum = 0; |
2489 | |
2490 | auto PseudoKind = Func.getCountPseudoKind(); |
2491 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
2492 | if (Show) { |
2493 | if (!ShownFunctions) |
2494 | OS << "Counters:\n"; |
2495 | ++ShownFunctions; |
2496 | OS << " " << Func.Name << ":\n" |
2497 | << " Hash: " << format("0x%016" PRIx64"l" "x", Func.Hash) << "\n" |
2498 | << " Counters: " << Func.Counts.size(); |
2499 | if (PseudoKind == InstrProfRecord::PseudoHot) |
2500 | OS << " <PseudoHot>\n"; |
2501 | else if (PseudoKind == InstrProfRecord::PseudoWarm) |
2502 | OS << " <PseudoWarm>\n"; |
2503 | else |
2504 | llvm_unreachable("Unknown PseudoKind")::llvm::llvm_unreachable_internal("Unknown PseudoKind", "llvm/tools/llvm-profdata/llvm-profdata.cpp" , 2504); |
2505 | } |
2506 | continue; |
2507 | } |
2508 | |
2509 | for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { |
2510 | FuncMax = std::max(FuncMax, Func.Counts[I]); |
2511 | FuncSum += Func.Counts[I]; |
2512 | } |
2513 | |
2514 | if (FuncMax < ValueCutoff) { |
2515 | ++BelowCutoffFunctions; |
2516 | if (OnlyListBelow) { |
2517 | OS << " " << Func.Name << ": (Max = " << FuncMax |
2518 | << " Sum = " << FuncSum << ")\n"; |
2519 | } |
2520 | continue; |
2521 | } else if (OnlyListBelow) |
2522 | continue; |
2523 | |
2524 | if (TopN) { |
2525 | if (HottestFuncs.size() == TopN) { |
2526 | if (HottestFuncs.top().second < FuncMax) { |
2527 | HottestFuncs.pop(); |
2528 | HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); |
2529 | } |
2530 | } else |
2531 | HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); |
2532 | } |
2533 | |
2534 | if (Show) { |
2535 | if (!ShownFunctions) |
2536 | OS << "Counters:\n"; |
2537 | |
2538 | ++ShownFunctions; |
2539 | |
2540 | OS << " " << Func.Name << ":\n" |
2541 | << " Hash: " << format("0x%016" PRIx64"l" "x", Func.Hash) << "\n" |
2542 | << " Counters: " << Func.Counts.size() << "\n"; |
2543 | if (!IsIRInstr) |
2544 | OS << " Function count: " << Func.Counts[0] << "\n"; |
2545 | |
2546 | if (ShowIndirectCallTargets) |
2547 | OS << " Indirect Call Site Count: " |
2548 | << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; |
2549 | |
2550 | uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); |
2551 | if (ShowMemOPSizes && NumMemOPCalls > 0) |
2552 | OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls |
2553 | << "\n"; |
2554 | |
2555 | if (ShowCounts) { |
2556 | OS << " Block counts: ["; |
2557 | size_t Start = (IsIRInstr ? 0 : 1); |
2558 | for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) { |
2559 | OS << (I == Start ? "" : ", ") << Func.Counts[I]; |
2560 | } |
2561 | OS << "]\n"; |
2562 | } |
2563 | |
2564 | if (ShowIndirectCallTargets) { |
2565 | OS << " Indirect Target Results:\n"; |
2566 | traverseAllValueSites(Func, IPVK_IndirectCallTarget, |
2567 | VPStats[IPVK_IndirectCallTarget], OS, |
2568 | &(Reader->getSymtab())); |
2569 | } |
2570 | |
2571 | if (ShowMemOPSizes && NumMemOPCalls > 0) { |
2572 | OS << " Memory Intrinsic Size Results:\n"; |
2573 | traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, |
2574 | nullptr); |
2575 | } |
2576 | } |
2577 | } |
2578 | if (Reader->hasError()) |
2579 | exitWithError(Reader->getError(), Filename); |
2580 | |
2581 | if (TextFormat || ShowCovered) |
2582 | return 0; |
2583 | std::unique_ptr<ProfileSummary> PS(Builder.getSummary()); |
2584 | bool IsIR = Reader->isIRLevelProfile(); |
2585 | OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end"); |
2586 | if (IsIR) |
2587 | OS << " entry_first = " << Reader->instrEntryBBEnabled(); |
2588 | OS << "\n"; |
2589 | if (ShowAllFunctions || !ShowFunction.empty()) |
2590 | OS << "Functions shown: " << ShownFunctions << "\n"; |
2591 | OS << "Total functions: " << PS->getNumFunctions() << "\n"; |
2592 | if (ValueCutoff > 0) { |
2593 | OS << "Number of functions with maximum count (< " << ValueCutoff |
2594 | << "): " << BelowCutoffFunctions << "\n"; |
2595 | OS << "Number of functions with maximum count (>= " << ValueCutoff |
2596 | << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n"; |
2597 | } |
2598 | OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n"; |
2599 | OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n"; |
2600 | |
2601 | if (TopN) { |
2602 | std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs; |
2603 | while (!HottestFuncs.empty()) { |
2604 | SortedHottestFuncs.emplace_back(HottestFuncs.top()); |
2605 | HottestFuncs.pop(); |
2606 | } |
2607 | OS << "Top " << TopN |
2608 | << " functions with the largest internal block counts: \n"; |
2609 | for (auto &hotfunc : llvm::reverse(SortedHottestFuncs)) |
2610 | OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n"; |
2611 | } |
2612 | |
2613 | if (ShownFunctions && ShowIndirectCallTargets) { |
2614 | OS << "Statistics for indirect call sites profile:\n"; |
2615 | showValueSitesStats(OS, IPVK_IndirectCallTarget, |
2616 | VPStats[IPVK_IndirectCallTarget]); |
2617 | } |
2618 | |
2619 | if (ShownFunctions && ShowMemOPSizes) { |
2620 | OS << "Statistics for memory intrinsic calls sizes profile:\n"; |
2621 | showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); |
2622 | } |
2623 | |
2624 | if (ShowDetailedSummary) { |
2625 | OS << "Total number of blocks: " << PS->getNumCounts() << "\n"; |
2626 | OS << "Total count: " << PS->getTotalCount() << "\n"; |
2627 | PS->printDetailedSummary(OS); |
2628 | } |
2629 | |
2630 | if (ShowBinaryIds) |
2631 | if (Error E = Reader->printBinaryIds(OS)) |
2632 | exitWithError(std::move(E), Filename); |
2633 | |
2634 | if (ShowProfileVersion) |
2635 | OS << "Profile version: " << Reader->getVersion() << "\n"; |
2636 | |
2637 | if (ShowTemporalProfTraces) { |
2638 | auto &Traces = Reader->getTemporalProfTraces(); |
2639 | OS << "Temporal Profile Traces (samples=" << Traces.size() |
2640 | << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n"; |
2641 | for (unsigned i = 0; i < Traces.size(); i++) { |
2642 | OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight |
2643 | << " count=" << Traces[i].FunctionNameRefs.size() << "):\n"; |
2644 | for (auto &NameRef : Traces[i].FunctionNameRefs) |
2645 | OS << " " << Reader->getSymtab().getFuncName(NameRef) << "\n"; |
2646 | } |
2647 | } |
2648 | |
2649 | return 0; |
2650 | } |
2651 | |
2652 | static void showSectionInfo(sampleprof::SampleProfileReader *Reader, |
2653 | raw_fd_ostream &OS) { |
2654 | if (!Reader->dumpSectionInfo(OS)) { |
2655 | WithColor::warning() << "-show-sec-info-only is only supported for " |
2656 | << "sample profile in extbinary format and is " |
2657 | << "ignored for other formats.\n"; |
2658 | return; |
2659 | } |
2660 | } |
2661 | |
2662 | namespace { |
2663 | struct HotFuncInfo { |
2664 | std::string FuncName; |
2665 | uint64_t TotalCount; |
2666 | double TotalCountPercent; |
2667 | uint64_t MaxCount; |
2668 | uint64_t EntryCount; |
2669 | |
2670 | HotFuncInfo() |
2671 | : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {} |
2672 | |
2673 | HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) |
2674 | : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), |
2675 | MaxCount(MS), EntryCount(ES) {} |
2676 | }; |
2677 | } // namespace |
2678 | |
2679 | // Print out detailed information about hot functions in PrintValues vector. |
2680 | // Users specify titles and offset of every columns through ColumnTitle and |
2681 | // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same |
2682 | // and at least 4. Besides, users can optionally give a HotFuncMetric string to |
2683 | // print out or let it be an empty string. |
2684 | static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle, |
2685 | const std::vector<int> &ColumnOffset, |
2686 | const std::vector<HotFuncInfo> &PrintValues, |
2687 | uint64_t HotFuncCount, uint64_t TotalFuncCount, |
2688 | uint64_t HotProfCount, uint64_t TotalProfCount, |
2689 | const std::string &HotFuncMetric, |
2690 | uint32_t TopNFunctions, raw_fd_ostream &OS) { |
2691 | assert(ColumnOffset.size() == ColumnTitle.size() &&(static_cast <bool> (ColumnOffset.size() == ColumnTitle .size() && "ColumnOffset and ColumnTitle should have the same size" ) ? void (0) : __assert_fail ("ColumnOffset.size() == ColumnTitle.size() && \"ColumnOffset and ColumnTitle should have the same size\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2692, __extension__ __PRETTY_FUNCTION__)) |
2692 | "ColumnOffset and ColumnTitle should have the same size")(static_cast <bool> (ColumnOffset.size() == ColumnTitle .size() && "ColumnOffset and ColumnTitle should have the same size" ) ? void (0) : __assert_fail ("ColumnOffset.size() == ColumnTitle.size() && \"ColumnOffset and ColumnTitle should have the same size\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2692, __extension__ __PRETTY_FUNCTION__)); |
2693 | assert(ColumnTitle.size() >= 4 &&(static_cast <bool> (ColumnTitle.size() >= 4 && "ColumnTitle should have at least 4 elements") ? void (0) : __assert_fail ("ColumnTitle.size() >= 4 && \"ColumnTitle should have at least 4 elements\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2694, __extension__ __PRETTY_FUNCTION__)) |
2694 | "ColumnTitle should have at least 4 elements")(static_cast <bool> (ColumnTitle.size() >= 4 && "ColumnTitle should have at least 4 elements") ? void (0) : __assert_fail ("ColumnTitle.size() >= 4 && \"ColumnTitle should have at least 4 elements\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2694, __extension__ __PRETTY_FUNCTION__)); |
2695 | assert(TotalFuncCount > 0 &&(static_cast <bool> (TotalFuncCount > 0 && "There should be at least one function in the profile" ) ? void (0) : __assert_fail ("TotalFuncCount > 0 && \"There should be at least one function in the profile\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2696, __extension__ __PRETTY_FUNCTION__)) |
2696 | "There should be at least one function in the profile")(static_cast <bool> (TotalFuncCount > 0 && "There should be at least one function in the profile" ) ? void (0) : __assert_fail ("TotalFuncCount > 0 && \"There should be at least one function in the profile\"" , "llvm/tools/llvm-profdata/llvm-profdata.cpp", 2696, __extension__ __PRETTY_FUNCTION__)); |
2697 | double TotalProfPercent = 0; |
2698 | if (TotalProfCount > 0) |
2699 | TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100; |
2700 | |
2701 | formatted_raw_ostream FOS(OS); |
2702 | FOS << HotFuncCount << " out of " << TotalFuncCount |
2703 | << " functions with profile (" |
2704 | << format("%.2f%%", |
2705 | (static_cast<double>(HotFuncCount) / TotalFuncCount * 100)) |
2706 | << ") are considered hot functions"; |
2707 | if (!HotFuncMetric.empty()) |
2708 | FOS << " (" << HotFuncMetric << ")"; |
2709 | FOS << ".\n"; |
2710 | FOS << HotProfCount << " out of " << TotalProfCount << " profile counts (" |
2711 | << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n"; |
2712 | |
2713 | for (size_t I = 0; I < ColumnTitle.size(); ++I) { |
2714 | FOS.PadToColumn(ColumnOffset[I]); |
2715 | FOS << ColumnTitle[I]; |
2716 | } |
2717 | FOS << "\n"; |
2718 | |
2719 | uint32_t Count = 0; |
2720 | for (const auto &R : PrintValues) { |
2721 | if (TopNFunctions && (Count++ == TopNFunctions)) |
2722 | break; |
2723 | FOS.PadToColumn(ColumnOffset[0]); |
2724 | FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")"; |
2725 | FOS.PadToColumn(ColumnOffset[1]); |
2726 | FOS << R.MaxCount; |
2727 | FOS.PadToColumn(ColumnOffset[2]); |
2728 | FOS << R.EntryCount; |
2729 | FOS.PadToColumn(ColumnOffset[3]); |
2730 | FOS << R.FuncName << "\n"; |
2731 | } |
2732 | } |
2733 | |
2734 | static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, |
2735 | ProfileSummary &PS, uint32_t TopN, |
2736 | raw_fd_ostream &OS) { |
2737 | using namespace sampleprof; |
2738 | |
2739 | const uint32_t HotFuncCutoff = 990000; |
2740 | auto &SummaryVector = PS.getDetailedSummary(); |
2741 | uint64_t MinCountThreshold = 0; |
2742 | for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) { |
2743 | if (SummaryEntry.Cutoff == HotFuncCutoff) { |
2744 | MinCountThreshold = SummaryEntry.MinCount; |
2745 | break; |
2746 | } |
2747 | } |
2748 | |
2749 | // Traverse all functions in the profile and keep only hot functions. |
2750 | // The following loop also calculates the sum of total samples of all |
2751 | // functions. |
2752 | std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>, |
2753 | std::greater<uint64_t>> |
2754 | HotFunc; |
2755 | uint64_t ProfileTotalSample = 0; |
2756 | uint64_t HotFuncSample = 0; |
2757 | uint64_t HotFuncCount = 0; |
2758 | |
2759 | for (const auto &I : Profiles) { |
2760 | FuncSampleStats FuncStats; |
2761 | const FunctionSamples &FuncProf = I.second; |
2762 | ProfileTotalSample += FuncProf.getTotalSamples(); |
2763 | getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold); |
2764 | |
2765 | if (isFunctionHot(FuncStats, MinCountThreshold)) { |
2766 | HotFunc.emplace(FuncProf.getTotalSamples(), |
2767 | std::make_pair(&(I.second), FuncStats.MaxSample)); |
2768 | HotFuncSample += FuncProf.getTotalSamples(); |
2769 | ++HotFuncCount; |
2770 | } |
2771 | } |
2772 | |
2773 | std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample", |
2774 | "Entry sample", "Function name"}; |
2775 | std::vector<int> ColumnOffset{0, 24, 42, 58}; |
2776 | std::string Metric = |
2777 | std::string("max sample >= ") + std::to_string(MinCountThreshold); |
2778 | std::vector<HotFuncInfo> PrintValues; |
2779 | for (const auto &FuncPair : HotFunc) { |
2780 | const FunctionSamples &Func = *FuncPair.second.first; |
2781 | double TotalSamplePercent = |
2782 | (ProfileTotalSample > 0) |
2783 | ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample |
2784 | : 0; |
2785 | PrintValues.emplace_back( |
2786 | HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), |
2787 | TotalSamplePercent, FuncPair.second.second, |
2788 | Func.getHeadSamplesEstimate())); |
2789 | } |
2790 | dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, |
2791 | Profiles.size(), HotFuncSample, ProfileTotalSample, |
2792 | Metric, TopN, OS); |
2793 | |
2794 | return 0; |
2795 | } |
2796 | |
2797 | static int showSampleProfile(const std::string &Filename, bool ShowCounts, |
2798 | uint32_t TopN, bool ShowAllFunctions, |
2799 | bool ShowDetailedSummary, |
2800 | const std::string &ShowFunction, |
2801 | bool ShowProfileSymbolList, |
2802 | bool ShowSectionInfoOnly, bool ShowHotFuncList, |
2803 | ShowFormat SFormat, raw_fd_ostream &OS) { |
2804 | if (SFormat == ShowFormat::Yaml) |
2805 | exitWithError("YAML output is not supported for sample profiles"); |
2806 | using namespace sampleprof; |
2807 | LLVMContext Context; |
2808 | auto FS = vfs::getRealFileSystem(); |
2809 | auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS, |
2810 | FSDiscriminatorPassOption); |
2811 | if (std::error_code EC = ReaderOrErr.getError()) |
2812 | exitWithErrorCode(EC, Filename); |
2813 | |
2814 | auto Reader = std::move(ReaderOrErr.get()); |
2815 | if (ShowSectionInfoOnly) { |
2816 | showSectionInfo(Reader.get(), OS); |
2817 | return 0; |
2818 | } |
2819 | |
2820 | if (std::error_code EC = Reader->read()) |
2821 | exitWithErrorCode(EC, Filename); |
2822 | |
2823 | if (ShowAllFunctions || ShowFunction.empty()) { |
2824 | if (SFormat == ShowFormat::Json) |
2825 | Reader->dumpJson(OS); |
2826 | else |
2827 | Reader->dump(OS); |
2828 | } else { |
2829 | if (SFormat == ShowFormat::Json) |
2830 | exitWithError( |
2831 | "the JSON format is supported only when all functions are to " |
2832 | "be printed"); |
2833 | |
2834 | // TODO: parse context string to support filtering by contexts. |
2835 | Reader->dumpFunctionProfile(StringRef(ShowFunction), OS); |
2836 | } |
2837 | |
2838 | if (ShowProfileSymbolList) { |
2839 | std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList = |
2840 | Reader->getProfileSymbolList(); |
2841 | ReaderList->dump(OS); |
2842 | } |
2843 | |
2844 | if (ShowDetailedSummary) { |
2845 | auto &PS = Reader->getSummary(); |
2846 | PS.printSummary(OS); |
2847 | PS.printDetailedSummary(OS); |
2848 | } |
2849 | |
2850 | if (ShowHotFuncList || TopN) |
2851 | showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS); |
2852 | |
2853 | return 0; |
2854 | } |
2855 | |
2856 | static int showMemProfProfile(const std::string &Filename, |
2857 | const std::string &ProfiledBinary, |
2858 | ShowFormat SFormat, raw_fd_ostream &OS) { |
2859 | if (SFormat == ShowFormat::Json) |
2860 | exitWithError("JSON output is not supported for MemProf"); |
2861 | auto ReaderOr = llvm::memprof::RawMemProfReader::create( |
2862 | Filename, ProfiledBinary, /*KeepNames=*/true); |
2863 | if (Error E = ReaderOr.takeError()) |
2864 | // Since the error can be related to the profile or the binary we do not |
2865 | // pass whence. Instead additional context is provided where necessary in |
2866 | // the error message. |
2867 | exitWithError(std::move(E), /*Whence*/ ""); |
2868 | |
2869 | std::unique_ptr<llvm::memprof::RawMemProfReader> Reader( |
2870 | ReaderOr.get().release()); |
2871 | |
2872 | Reader->printYAML(OS); |
2873 | return 0; |
2874 | } |
2875 | |
2876 | static int showDebugInfoCorrelation(const std::string &Filename, |
2877 | bool ShowDetailedSummary, |
2878 | bool ShowProfileSymbolList, |
2879 | ShowFormat SFormat, raw_fd_ostream &OS) { |
2880 | if (SFormat == ShowFormat::Json) |
2881 | exitWithError("JSON output is not supported for debug info correlation"); |
2882 | std::unique_ptr<InstrProfCorrelator> Correlator; |
2883 | if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator)) |
2884 | exitWithError(std::move(Err), Filename); |
2885 | if (SFormat == ShowFormat::Yaml) { |
2886 | if (auto Err = Correlator->dumpYaml(OS)) |
2887 | exitWithError(std::move(Err), Filename); |
2888 | return 0; |
2889 | } |
2890 | |
2891 | if (auto Err = Correlator->correlateProfileData()) |
2892 | exitWithError(std::move(Err), Filename); |
2893 | |
2894 | InstrProfSymtab Symtab; |
2895 | if (auto Err = Symtab.create( |
2896 | StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize()))) |
2897 | exitWithError(std::move(Err), Filename); |
2898 | |
2899 | if (ShowProfileSymbolList) |
2900 | Symtab.dumpNames(OS); |
2901 | // TODO: Read "Profile Data Type" from debug info to compute and show how many |
2902 | // counters the section holds. |
2903 | if (ShowDetailedSummary) |
2904 | OS << "Counters section size: 0x" |
2905 | << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n"; |
2906 | OS << "Found " << Correlator->getDataSize() << " functions\n"; |
2907 | |
2908 | return 0; |
2909 | } |
2910 | |
2911 | static int show_main(int argc, const char *argv[]) { |
2912 | cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>")); |
2913 | |
2914 | cl::opt<bool> ShowCounts("counts", cl::init(false), |
2915 | cl::desc("Show counter values for shown functions")); |
2916 | cl::opt<ShowFormat> SFormat( |
2917 | "show-format", cl::init(ShowFormat::Text), |
2918 | cl::desc("Emit output in the selected format if supported"), |
2919 | cl::values(clEnumValN(ShowFormat::Text, "text",llvm::cl::OptionEnumValue { "text", int(ShowFormat::Text), "emit normal text output (default)" } |
2920 | "emit normal text output (default)")llvm::cl::OptionEnumValue { "text", int(ShowFormat::Text), "emit normal text output (default)" }, |
2921 | clEnumValN(ShowFormat::Json, "json", "emit JSON")llvm::cl::OptionEnumValue { "json", int(ShowFormat::Json), "emit JSON" }, |
2922 | clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")llvm::cl::OptionEnumValue { "yaml", int(ShowFormat::Yaml), "emit YAML" })); |
2923 | // TODO: Consider replacing this with `--show-format=text-encoding`. |
2924 | cl::opt<bool> TextFormat( |
2925 | "text", cl::init(false), |
2926 | cl::desc("Show instr profile data in text dump format")); |
2927 | cl::opt<bool> JsonFormat( |
2928 | "json", cl::desc("Show sample profile data in the JSON format " |
2929 | "(deprecated, please use --show-format=json)")); |
2930 | cl::opt<bool> ShowIndirectCallTargets( |
2931 | "ic-targets", cl::init(false), |
2932 | cl::desc("Show indirect call site target values for shown functions")); |
2933 | cl::opt<bool> ShowMemOPSizes( |
2934 | "memop-sizes", cl::init(false), |
2935 | cl::desc("Show the profiled sizes of the memory intrinsic calls " |
2936 | "for shown functions")); |
2937 | cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false), |
2938 | cl::desc("Show detailed profile summary")); |
2939 | cl::list<uint32_t> DetailedSummaryCutoffs( |
2940 | cl::CommaSeparated, "detailed-summary-cutoffs", |
2941 | cl::desc( |
2942 | "Cutoff percentages (times 10000) for generating detailed summary"), |
2943 | cl::value_desc("800000,901000,999999")); |
2944 | cl::opt<bool> ShowHotFuncList( |
2945 | "hot-func-list", cl::init(false), |
2946 | cl::desc("Show profile summary of a list of hot functions")); |
2947 | cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false), |
2948 | cl::desc("Details for every function")); |
2949 | cl::opt<bool> ShowCS("showcs", cl::init(false), |
2950 | cl::desc("Show context sensitive counts")); |
2951 | cl::opt<std::string> ShowFunction("function", |
2952 | cl::desc("Details for matching functions")); |
2953 | |
2954 | cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), |
2955 | cl::init("-"), cl::desc("Output file")); |
2956 | cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), |
2957 | cl::aliasopt(OutputFilename)); |
2958 | cl::opt<ProfileKinds> ProfileKind( |
2959 | cl::desc("Profile kind:"), cl::init(instr), |
2960 | cl::values(clEnumVal(instr, "Instrumentation profile (default)")llvm::cl::OptionEnumValue { "instr", int(instr), "Instrumentation profile (default)" }, |
2961 | clEnumVal(sample, "Sample profile")llvm::cl::OptionEnumValue { "sample", int(sample), "Sample profile" }, |
2962 | clEnumVal(memory, "MemProf memory access profile")llvm::cl::OptionEnumValue { "memory", int(memory), "MemProf memory access profile" })); |
2963 | cl::opt<uint32_t> TopNFunctions( |
2964 | "topn", cl::init(0), |
2965 | cl::desc("Show the list of functions with the largest internal counts")); |
2966 | cl::opt<uint32_t> ValueCutoff( |
2967 | "value-cutoff", cl::init(0), |
2968 | cl::desc("Set the count value cutoff. Functions with the maximum count " |
2969 | "less than this value will not be printed out. (Default is 0)")); |
2970 | cl::opt<bool> OnlyListBelow( |
2971 | "list-below-cutoff", cl::init(false), |
2972 | cl::desc("Only output names of functions whose max count values are " |
2973 | "below the cutoff value")); |
2974 | cl::opt<bool> ShowProfileSymbolList( |
2975 | "show-prof-sym-list", cl::init(false), |
2976 | cl::desc("Show profile symbol list if it exists in the profile. ")); |
2977 | cl::opt<bool> ShowSectionInfoOnly( |
2978 | "show-sec-info-only", cl::init(false), |
2979 | cl::desc("Show the information of each section in the sample profile. " |
2980 | "The flag is only usable when the sample profile is in " |
2981 | "extbinary format")); |
2982 | cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false), |
2983 | cl::desc("Show binary ids in the profile. ")); |
2984 | cl::opt<bool> ShowTemporalProfTraces( |
2985 | "temporal-profile-traces", |
2986 | cl::desc("Show temporal profile traces in the profile.")); |
2987 | cl::opt<std::string> DebugInfoFilename( |
2988 | "debug-info", cl::init(""), |
2989 | cl::desc("Read and extract profile metadata from debug info and show " |
2990 | "the functions it found.")); |
2991 | cl::opt<bool> ShowCovered( |
2992 | "covered", cl::init(false), |
2993 | cl::desc("Show only the functions that have been executed.")); |
2994 | cl::opt<std::string> ProfiledBinary( |
2995 | "profiled-binary", cl::init(""), |
2996 | cl::desc("Path to binary from which the profile was collected.")); |
2997 | cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false), |
2998 | cl::desc("Show profile version. ")); |
2999 | cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); |
3000 | |
3001 | if (Filename.empty() && DebugInfoFilename.empty()) |
3002 | exitWithError( |
3003 | "the positional argument '<profdata-file>' is required unless '--" + |
3004 | DebugInfoFilename.ArgStr + "' is provided"); |
3005 | |
3006 | if (Filename == OutputFilename) { |
3007 | errs() << sys::path::filename(argv[0]) |
3008 | << ": Input file name cannot be the same as the output file name!\n"; |
3009 | return 1; |
3010 | } |
3011 | if (JsonFormat) |
3012 | SFormat = ShowFormat::Json; |
3013 | |
3014 | std::error_code EC; |
3015 | raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); |
3016 | if (EC) |
3017 | exitWithErrorCode(EC, OutputFilename); |
3018 | |
3019 | if (ShowAllFunctions && !ShowFunction.empty()) |
3020 | WithColor::warning() << "-function argument ignored: showing all functions\n"; |
3021 | |
3022 | if (!DebugInfoFilename.empty()) |
3023 | return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary, |
3024 | ShowProfileSymbolList, SFormat, OS); |
3025 | |
3026 | if (ProfileKind == instr) |
3027 | return showInstrProfile( |
3028 | Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, |
3029 | ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, |
3030 | ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, |
3031 | TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, |
3032 | ShowTemporalProfTraces, SFormat, OS); |
3033 | if (ProfileKind == sample) |
3034 | return showSampleProfile(Filename, ShowCounts, TopNFunctions, |
3035 | ShowAllFunctions, ShowDetailedSummary, |
3036 | ShowFunction, ShowProfileSymbolList, |
3037 | ShowSectionInfoOnly, ShowHotFuncList, SFormat, OS); |
3038 | return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS); |
3039 | } |
3040 | |
3041 | int llvm_profdata_main(int argc, char **argvNonConst, |
3042 | const llvm::ToolContext &) { |
3043 | const char **argv = const_cast<const char **>(argvNonConst); |
3044 | InitLLVM X(argc, argv); |
3045 | |
3046 | StringRef ProgName(sys::path::filename(argv[0])); |
3047 | if (argc > 1) { |
3048 | int (*func)(int, const char *[]) = nullptr; |
3049 | |
3050 | if (strcmp(argv[1], "merge") == 0) |
3051 | func = merge_main; |
3052 | else if (strcmp(argv[1], "show") == 0) |
3053 | func = show_main; |
3054 | else if (strcmp(argv[1], "overlap") == 0) |
3055 | func = overlap_main; |
3056 | |
3057 | if (func) { |
3058 | std::string Invocation(ProgName.str() + " " + argv[1]); |
3059 | argv[1] = Invocation.c_str(); |
3060 | return func(argc - 1, argv + 1); |
3061 | } |
3062 | |
3063 | if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 || |
3064 | strcmp(argv[1], "--help") == 0) { |
3065 | |
3066 | errs() << "OVERVIEW: LLVM profile data tools\n\n" |
3067 | << "USAGE: " << ProgName << " <command> [args...]\n" |
3068 | << "USAGE: " << ProgName << " <command> -help\n\n" |
3069 | << "See each individual command --help for more details.\n" |
3070 | << "Available commands: merge, show, overlap\n"; |
3071 | return 0; |
3072 | } |
3073 | } |
3074 | |
3075 | if (argc < 2) |
3076 | errs() << ProgName << ": No command specified!\n"; |
3077 | else |
3078 | errs() << ProgName << ": Unknown command!\n"; |
3079 | |
3080 | errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n"; |
3081 | return 1; |
3082 | } |