LLVM 22.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
17#include "llvm/IR/MDBuilder.h"
20
21using namespace llvm;
22using namespace sampleprof;
23
24#define DEBUG_TYPE "sample-profile-matcher"
25
27 "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
28 cl::desc("Consider a profile matches a function if the similarity of their "
29 "callee sequences is above the specified percentile."));
30
32 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
33 cl::desc("The minimum number of basic blocks required for a function to "
34 "run stale profile call graph matching."));
35
37 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
38 cl::desc("The minimum number of call anchors required for a function to "
39 "run stale profile call graph matching."));
40
42 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true),
44 "Load top-level profiles that the sample reader initially skipped for "
45 "the call-graph matching (only meaningful for extended binary "
46 "format)"));
47
52
54 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
55 cl::desc("The maximum number of callsites in a function, above which stale "
56 "profile matching will be skipped."));
57
58void SampleProfileMatcher::findIRAnchors(const Function &F,
59 AnchorMap &IRAnchors) const {
60 // For inlined code, recover the original callsite and callee by finding the
61 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
62 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
63 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
64 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
65 const DILocation *PrevDIL = nullptr;
66 do {
67 PrevDIL = DIL;
68 DIL = DIL->getInlinedAt();
69 } while (DIL->getInlinedAt());
70
73 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
74 return std::make_pair(Callsite, FunctionId(CalleeName));
75 };
76
77 auto GetCanonicalCalleeName = [](const CallBase *CB) {
78 StringRef CalleeName = UnknownIndirectCallee;
79 if (Function *Callee = CB->getCalledFunction())
80 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
81 return CalleeName;
82 };
83
84 // Extract profile matching anchors in the IR.
85 for (auto &BB : F) {
86 for (auto &I : BB) {
87 DILocation *DIL = I.getDebugLoc();
88 if (!DIL)
89 continue;
90
92 if (auto Probe = extractProbe(I)) {
93 // Flatten inlined IR for the matching.
94 if (DIL->getInlinedAt()) {
95 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
96 } else {
97 // Use empty StringRef for basic block probe.
98 StringRef CalleeName;
99 if (const auto *CB = dyn_cast<CallBase>(&I)) {
100 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
101 if (!isa<IntrinsicInst>(&I))
102 CalleeName = GetCanonicalCalleeName(CB);
103 }
104 LineLocation Loc = LineLocation(Probe->Id, 0);
105 IRAnchors.emplace(Loc, FunctionId(CalleeName));
106 }
107 }
108 } else {
109 // TODO: For line-number based profile(AutoFDO), currently only support
110 // find callsite anchors. In future, we need to parse all the non-call
111 // instructions to extract the line locations for profile matching.
112 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
113 continue;
114
115 if (DIL->getInlinedAt()) {
116 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
117 } else {
120 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
121 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
122 }
123 }
124 }
125 }
126}
127
128void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
129 AnchorMap &ProfileAnchors) const {
130 auto isInvalidLineOffset = [](uint32_t LineOffset) {
131 return LineOffset & 0x8000;
132 };
133
134 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
135 AnchorMap &ProfileAnchors) {
136 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
137 if (!Ret.second) {
138 // For multiple callees, which indicates it's an indirect call, we use a
139 // dummy name(UnknownIndirectCallee) as the indrect callee name.
140 Ret.first->second = FunctionId(UnknownIndirectCallee);
141 }
142 };
143
144 for (const auto &I : FS.getBodySamples()) {
145 const LineLocation &Loc = I.first;
146 if (isInvalidLineOffset(Loc.LineOffset))
147 continue;
148 for (const auto &C : I.second.getCallTargets())
149 InsertAnchor(Loc, C.first, ProfileAnchors);
150 }
151
152 for (const auto &I : FS.getCallsiteSamples()) {
153 const LineLocation &Loc = I.first;
154 if (isInvalidLineOffset(Loc.LineOffset))
155 continue;
156 for (const auto &C : I.second)
157 InsertAnchor(Loc, C.first, ProfileAnchors);
158 }
159}
160
161bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
162 Function *&FuncWithoutProfile) {
163 FuncWithoutProfile = nullptr;
164 auto R = FunctionsWithoutProfile.find(IRFuncName);
165 if (R != FunctionsWithoutProfile.end())
166 FuncWithoutProfile = R->second;
167 return !FuncWithoutProfile;
168}
169
170bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
171 return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
172}
173
174bool SampleProfileMatcher::functionMatchesProfile(
175 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
176 bool FindMatchedProfileOnly) {
177 if (IRFuncName == ProfileFuncName)
178 return true;
180 return false;
181
182 // If IR function doesn't have profile and the profile is unused, try
183 // matching them.
184 Function *IRFunc = nullptr;
185 if (functionHasProfile(IRFuncName, IRFunc) ||
186 !isProfileUnused(ProfileFuncName))
187 return false;
188
189 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
190 "IR function should be different from profile function to match");
191 return functionMatchesProfile(*IRFunc, ProfileFuncName,
192 FindMatchedProfileOnly);
193}
194
196SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
197 const AnchorList &AnchorList2,
198 bool MatchUnusedFunction) {
199 LocToLocMap MatchedAnchors;
200 llvm::longestCommonSequence<LineLocation, FunctionId>(
201 AnchorList1, AnchorList2,
202 [&](const FunctionId &A, const FunctionId &B) {
203 return functionMatchesProfile(
204 A, B,
205 !MatchUnusedFunction // Find matched function only
206 );
207 },
209 MatchedAnchors.try_emplace(A, B);
210 });
211 return MatchedAnchors;
212}
213
214void SampleProfileMatcher::matchNonCallsiteLocs(
215 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
216 LocToLocMap &IRToProfileLocationMap) {
217 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
218 // Skip the unchanged location mapping to save memory.
219 if (From != To)
220 IRToProfileLocationMap.insert({From, To});
221 };
222
223 // Use function's beginning location as the initial anchor.
224 int32_t LocationDelta = 0;
225 SmallVector<LineLocation> LastMatchedNonAnchors;
226 for (const auto &IR : IRAnchors) {
227 const auto &Loc = IR.first;
228 bool IsMatchedAnchor = false;
229 // Match the anchor location in lexical order.
230 auto R = MatchedAnchors.find(Loc);
231 if (R != MatchedAnchors.end()) {
232 const auto &Candidate = R->second;
233 InsertMatching(Loc, Candidate);
234 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
235 << " is matched from " << Loc << " to " << Candidate
236 << "\n");
237 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
238
239 // Match backwards for non-anchor locations.
240 // The locations in LastMatchedNonAnchors have been matched forwards
241 // based on the previous anchor, spilt it evenly and overwrite the
242 // second half based on the current anchor.
243 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
244 I < LastMatchedNonAnchors.size(); I++) {
245 const auto &L = LastMatchedNonAnchors[I];
246 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
247 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
248 InsertMatching(L, Candidate);
249 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
250 << " to " << Candidate << "\n");
251 }
252
253 IsMatchedAnchor = true;
254 LastMatchedNonAnchors.clear();
255 }
256
257 // Match forwards for non-anchor locations.
258 if (!IsMatchedAnchor) {
259 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
260 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
261 InsertMatching(Loc, Candidate);
262 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
263 << Candidate << "\n");
264 LastMatchedNonAnchors.emplace_back(Loc);
265 }
266 }
267}
268
269// Filter the non-call locations from IRAnchors and ProfileAnchors and write
270// them into a list for random access later.
271void SampleProfileMatcher::getFilteredAnchorList(
272 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
273 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
274 for (const auto &I : IRAnchors) {
275 if (I.second.stringRef().empty())
276 continue;
277 FilteredIRAnchorsList.emplace_back(I);
278 }
279
280 for (const auto &I : ProfileAnchors)
281 FilteredProfileAnchorList.emplace_back(I);
282}
283
284// Call target name anchor based profile fuzzy matching.
285// Input:
286// For IR locations, the anchor is the callee name of direct callsite; For
287// profile locations, it's the call target name for BodySamples or inlinee's
288// profile name for CallsiteSamples.
289// Matching heuristic:
290// First match all the anchors using the diff algorithm, then split the
291// non-anchor locations between the two anchors evenly, first half are matched
292// based on the start anchor, second half are matched based on the end anchor.
293// For example, given:
294// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
295// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
296// The matching gives:
297// [1, 2(foo), 3, 5, 6(bar), 7]
298// | | | | | |
299// [1, 2, 3(foo), 4, 7, 8(bar), 9]
300// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
301void SampleProfileMatcher::runStaleProfileMatching(
302 const Function &F, const AnchorMap &IRAnchors,
303 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
304 bool RunCFGMatching, bool RunCGMatching) {
305 if (!RunCFGMatching && !RunCGMatching)
306 return;
307 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
308 << "\n");
309 assert(IRToProfileLocationMap.empty() &&
310 "Run stale profile matching only once per function");
311
312 AnchorList FilteredProfileAnchorList;
313 AnchorList FilteredIRAnchorsList;
314 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
315 FilteredProfileAnchorList);
316
317 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
318 return;
319
320 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
321 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
322 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
323 << " because the number of callsites in the IR is "
324 << FilteredIRAnchorsList.size()
325 << " and in the profile is "
326 << FilteredProfileAnchorList.size() << "\n");
327 return;
328 }
329
330 // Match the callsite anchors by finding the longest common subsequence
331 // between IR and profile.
332 // Define a match between two anchors as follows:
333 // 1) The function names of anchors are the same.
334 // 2) The similarity between the anchor functions is above a threshold if
335 // RunCGMatching is set.
336 // For 2), we only consider the anchor functions from IR and profile don't
337 // appear on either side to reduce the matching scope. Note that we need to
338 // use IR anchor as base(A side) to align with the order of
339 // IRToProfileLocationMap.
340 LocToLocMap MatchedAnchors =
341 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
342 RunCGMatching /* Match unused functions */);
343
344 // CFG level matching:
345 // Apply the callsite matchings to infer matching for the basic
346 // block(non-callsite) locations and write the result to
347 // IRToProfileLocationMap.
348 if (RunCFGMatching)
349 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
350}
351
352void SampleProfileMatcher::runOnFunction(Function &F) {
353 // We need to use flattened function samples for matching.
354 // Unlike IR, which includes all callsites from the source code, the callsites
355 // in profile only show up when they are hit by samples, i,e. the profile
356 // callsites in one context may differ from those in another context. To get
357 // the maximum number of callsites, we merge the function profiles from all
358 // contexts, aka, the flattened profile to find profile anchors.
359 const auto *FSForMatching = getFlattenedSamplesFor(F);
360 if (SalvageUnusedProfile && !FSForMatching) {
361 // Apply the matching in place to find the new function's matched profile.
362 auto R = FuncToProfileNameMap.find(&F);
363 if (R != FuncToProfileNameMap.end()) {
364 FSForMatching = getFlattenedSamplesFor(R->second);
365 // Try to find the salvaged top-level profiles that are explicitly loaded
366 // for the matching, see "functionMatchesProfileHelper" for the details.
367 if (!FSForMatching && LoadFuncProfileforCGMatching)
368 FSForMatching = Reader.getSamplesFor(R->second.stringRef());
369 }
370 }
371 if (!FSForMatching)
372 return;
373
374 // Anchors for IR. It's a map from IR location to callee name, callee name is
375 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
376 // for unknown indrect callee name.
377 AnchorMap IRAnchors;
378 findIRAnchors(F, IRAnchors);
379 // Anchors for profile. It's a map from callsite location to a set of callee
380 // name.
381 AnchorMap ProfileAnchors;
382 findProfileAnchors(*FSForMatching, ProfileAnchors);
383
384 // Compute the callsite match states for profile staleness report.
386 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
387
389 return;
390 // For probe-based profiles, run matching only when profile checksum is
391 // mismatched.
392 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
393 !ProbeManager->profileIsValid(F, *FSForMatching);
394 bool RunCFGMatching =
395 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
396 bool RunCGMatching = SalvageUnusedProfile;
397 // For imported functions, the checksum metadata(pseudo_probe_desc) are
398 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
399 // transfer the info: add the attribute during pre-link phase and check it
400 // during post-link phase(see "profileIsValid").
401 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
402 F.addFnAttr("profile-checksum-mismatch");
403
404 // The matching result will be saved to IRToProfileLocationMap, create a
405 // new map for each function.
406 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
407 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
408 RunCFGMatching, RunCGMatching);
409 // Find and update callsite match states after matching.
410 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
411 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
412 &IRToProfileLocationMap);
413}
414
415void SampleProfileMatcher::recordCallsiteMatchStates(
416 const Function &F, const AnchorMap &IRAnchors,
417 const AnchorMap &ProfileAnchors,
418 const LocToLocMap *IRToProfileLocationMap) {
419 bool IsPostMatch = IRToProfileLocationMap != nullptr;
420 auto &CallsiteMatchStates =
421 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
422
423 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
424 // IRToProfileLocationMap is null in pre-match phrase.
425 if (!IRToProfileLocationMap)
426 return IRLoc;
427 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
428 if (ProfileLoc != IRToProfileLocationMap->end())
429 return ProfileLoc->second;
430 else
431 return IRLoc;
432 };
433
434 for (const auto &I : IRAnchors) {
435 // After fuzzy profile matching, use the matching result to remap the
436 // current IR callsite.
437 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
438 const auto &IRCalleeId = I.second;
439 const auto &It = ProfileAnchors.find(ProfileLoc);
440 if (It == ProfileAnchors.end())
441 continue;
442 const auto &ProfCalleeId = It->second;
443 if (IRCalleeId == ProfCalleeId) {
444 auto It = CallsiteMatchStates.find(ProfileLoc);
445 if (It == CallsiteMatchStates.end())
446 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
447 else if (IsPostMatch) {
448 if (It->second == MatchState::InitialMatch)
449 It->second = MatchState::UnchangedMatch;
450 else if (It->second == MatchState::InitialMismatch)
451 It->second = MatchState::RecoveredMismatch;
452 }
453 }
454 }
455
456 // Check if there are any callsites in the profile that does not match to any
457 // IR callsites.
458 for (const auto &I : ProfileAnchors) {
459 const auto &Loc = I.first;
460 assert(!I.second.stringRef().empty() && "Callees should not be empty");
461 auto It = CallsiteMatchStates.find(Loc);
462 if (It == CallsiteMatchStates.end())
463 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
464 else if (IsPostMatch) {
465 // Update the state if it's not matched(UnchangedMatch or
466 // RecoveredMismatch).
467 if (It->second == MatchState::InitialMismatch)
468 It->second = MatchState::UnchangedMismatch;
469 else if (It->second == MatchState::InitialMatch)
470 It->second = MatchState::RemovedMatch;
471 }
472 }
473}
474
475void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
476 bool IsTopLevel) {
477 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
478 // Skip the function that is external or renamed.
479 if (!FuncDesc)
480 return;
481
482 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
483 if (IsTopLevel)
484 NumStaleProfileFunc++;
485 // Given currently all probe ids are after block probe ids, once the
486 // checksum is mismatched, it's likely all the callites are mismatched and
487 // dropped. We conservatively count all the samples as mismatched and stop
488 // counting the inlinees' profiles.
489 MismatchedFunctionSamples += FS.getTotalSamples();
490 return;
491 }
492
493 // Even the current-level function checksum is matched, it's possible that the
494 // nested inlinees' checksums are mismatched that affect the inlinee's sample
495 // loading, we need to go deeper to check the inlinees' function samples.
496 // Similarly, count all the samples as mismatched if the inlinee's checksum is
497 // mismatched using this recursive function.
498 for (const auto &I : FS.getCallsiteSamples())
499 for (const auto &CS : I.second)
500 countMismatchedFuncSamples(CS.second, false);
501}
502
503void SampleProfileMatcher::countMismatchedCallsiteSamples(
504 const FunctionSamples &FS) {
505 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
506 // Skip it if no mismatched callsite or this is an external function.
507 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
508 return;
509 const auto &CallsiteMatchStates = It->second;
510
511 auto findMatchState = [&](const LineLocation &Loc) {
512 auto It = CallsiteMatchStates.find(Loc);
513 if (It == CallsiteMatchStates.end())
514 return MatchState::Unknown;
515 return It->second;
516 };
517
518 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
519 uint64_t Samples) {
520 if (isMismatchState(State))
521 MismatchedCallsiteSamples += Samples;
522 else if (State == MatchState::RecoveredMismatch)
523 RecoveredCallsiteSamples += Samples;
524 };
525
526 // The non-inlined callsites are saved in the body samples of function
527 // profile, go through it to count the non-inlined callsite samples.
528 for (const auto &I : FS.getBodySamples())
529 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
530
531 // Count the inlined callsite samples.
532 for (const auto &I : FS.getCallsiteSamples()) {
533 auto State = findMatchState(I.first);
534 uint64_t CallsiteSamples = 0;
535 for (const auto &CS : I.second)
536 CallsiteSamples += CS.second.getTotalSamples();
537 AttributeMismatchedSamples(State, CallsiteSamples);
538
539 if (isMismatchState(State))
540 continue;
541
542 // When the current level of inlined call site matches the profiled call
543 // site, we need to go deeper along the inline tree to count mismatches from
544 // lower level inlinees.
545 for (const auto &CS : I.second)
546 countMismatchedCallsiteSamples(CS.second);
547 }
548}
549
550void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
551 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
552 // Skip it if no mismatched callsite or this is an external function.
553 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
554 return;
555 const auto &MatchStates = It->second;
556 [[maybe_unused]] bool OnInitialState =
557 isInitialState(MatchStates.begin()->second);
558 for (const auto &I : MatchStates) {
559 TotalProfiledCallsites++;
560 assert(
561 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
562 "Profile matching state is inconsistent");
563
564 if (isMismatchState(I.second))
565 NumMismatchedCallsites++;
566 else if (I.second == MatchState::RecoveredMismatch)
567 NumRecoveredCallsites++;
568 }
569}
570
571void SampleProfileMatcher::countCallGraphRecoveredSamples(
572 const FunctionSamples &FS,
573 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
574 if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
575 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
576 return;
577 }
578
579 for (const auto &CM : FS.getCallsiteSamples()) {
580 for (const auto &CS : CM.second) {
581 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
582 }
583 }
584}
585
586void SampleProfileMatcher::computeAndReportProfileStaleness() {
588 return;
589
590 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
592 for (const auto &I : FuncToProfileNameMap) {
593 CallGraphRecoveredProfiles.insert(I.second);
594 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
595 continue;
596 NumCallGraphRecoveredProfiledFunc++;
597 }
598 }
599
600 // Count profile mismatches for profile staleness report.
601 for (const auto &F : M) {
603 continue;
604 // As the stats will be merged by linker, skip reporting the metrics for
605 // imported functions to avoid repeated counting.
607 continue;
608 const auto *FS = Reader.getSamplesFor(F);
609 if (!FS)
610 continue;
611 TotalProfiledFunc++;
612 TotalFunctionSamples += FS->getTotalSamples();
613
614 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
615 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
616
617 // Checksum mismatch is only used in pseudo-probe mode.
619 countMismatchedFuncSamples(*FS, true);
620
621 // Count mismatches and samples for calliste.
622 countMismatchCallsites(*FS);
623 countMismatchedCallsiteSamples(*FS);
624 }
625
628 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
629 << ") of functions' profile are invalid and ("
630 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
631 << ") of samples are discarded due to function hash mismatch.\n";
632 }
634 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
635 << TotalProfiledFunc << ") of functions' profile are matched and ("
636 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
637 << ") of samples are reused by call graph matching.\n";
638 }
639
640 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
641 << TotalProfiledCallsites
642 << ") of callsites' profile are invalid and ("
643 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
644 << TotalFunctionSamples
645 << ") of samples are discarded due to callsite location mismatch.\n";
646 errs() << "(" << NumRecoveredCallsites << "/"
647 << (NumRecoveredCallsites + NumMismatchedCallsites)
648 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
649 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
650 << ") of samples are recovered by stale profile matching.\n";
651 }
652
654 LLVMContext &Ctx = M.getContext();
655 MDBuilder MDB(Ctx);
656
659 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
660 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
661 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
662 MismatchedFunctionSamples);
663 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
664 }
665
667 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
668 NumCallGraphRecoveredProfiledFunc);
669 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
670 NumCallGraphRecoveredFuncSamples);
671 }
672
673 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
674 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
675 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
676 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
677 MismatchedCallsiteSamples);
678 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
679 RecoveredCallsiteSamples);
680
681 auto *MD = MDB.createLLVMStats(ProfStatsVec);
682 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
683 NMD->addOperand(MD);
684 }
685}
686
687void SampleProfileMatcher::findFunctionsWithoutProfile() {
688 // TODO: Support MD5 profile.
690 return;
691 StringSet<> NamesInProfile;
692 if (auto NameTable = Reader.getNameTable()) {
693 for (auto Name : *NameTable)
694 NamesInProfile.insert(Name.stringRef());
695 }
696
697 for (auto &F : M) {
698 // Skip declarations, as even if the function can be matched, we have
699 // nothing to do with it.
700 if (F.isDeclaration())
701 continue;
702
703 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
704 const auto *FS = getFlattenedSamplesFor(F);
705 if (FS)
706 continue;
707
708 // For extended binary, functions fully inlined may not be loaded in the
709 // top-level profile, so check the NameTable which has the all symbol names
710 // in profile.
711 if (NamesInProfile.count(CanonFName))
712 continue;
713
714 // For extended binary, non-profiled function symbols are in the profile
715 // symbol list table.
716 if (PSL && PSL->contains(CanonFName))
717 continue;
718
719 LLVM_DEBUG(dbgs() << "Function " << CanonFName
720 << " is not in profile or profile symbol list.\n");
721 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
722 }
723}
724
725bool SampleProfileMatcher::functionMatchesProfileHelper(
726 const Function &IRFunc, const FunctionId &ProfFunc) {
727 // The value is in the range [0, 1]. The bigger the value is, the more similar
728 // two sequences are.
729 float Similarity = 0.0;
730
731 // Match the functions if they have the same base name(after demangling) and
732 // skip the similarity check.
734 // Helper lambda to demangle and get the base name. If the demangling failed,
735 // return an empty string.
736 auto GetBaseName = [&](StringRef FName) {
737 auto FunctionName = FName.str();
738 if (Demangler.partialDemangle(FunctionName.c_str()))
739 return std::string();
740 size_t BaseNameSize = 0;
741 // The demangler API follows the __cxa_demangle one, and thus needs a
742 // pointer that originates from malloc (or nullptr) and the caller is
743 // responsible for free()-ing the buffer.
744 char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
745 std::string Result = (BaseNamePtr && BaseNameSize)
746 ? std::string(BaseNamePtr, BaseNameSize)
747 : std::string();
748 free(BaseNamePtr);
749 return Result;
750 };
751 auto IRBaseName = GetBaseName(IRFunc.getName());
752 auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
753 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
754 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
755 << ProfFunc << "(Profile) share the same base name: "
756 << IRBaseName << ".\n");
757 return true;
758 }
759
760 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
761 // With extbinary profile format, initial profile loading only reads profile
762 // based on current function names in the module.
763 // However, if a function is renamed, sample loader skips to load its original
764 // profile(which has a different name), we will miss this case. To address
765 // this, we load the top-level profile candidate explicitly for the matching.
766 if (!FSForMatching && LoadFuncProfileforCGMatching) {
767 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
768 if (std::error_code EC = Reader.read(TopLevelFunc))
769 return false;
770 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
771 LLVM_DEBUG({
772 if (FSForMatching)
773 dbgs() << "Read top-level function " << ProfFunc
774 << " for call-graph matching\n";
775 });
776 }
777 if (!FSForMatching)
778 return false;
779 // The check for similarity or checksum may not be reliable if the function is
780 // tiny, we use the number of basic block as a proxy for the function
781 // complexity and skip the matching if it's too small.
782 if (IRFunc.size() < MinFuncCountForCGMatching ||
783 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
784 return false;
785
786 // For probe-based function, we first trust the checksum info. If the checksum
787 // doesn't match, we continue checking for similarity.
789 const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
790 if (FuncDesc &&
791 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
792 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
793 << "(IR) and " << ProfFunc << "(Profile) match.\n");
794
795 return true;
796 }
797 }
798
799 AnchorMap IRAnchors;
800 findIRAnchors(IRFunc, IRAnchors);
801 AnchorMap ProfileAnchors;
802 findProfileAnchors(*FSForMatching, ProfileAnchors);
803
804 AnchorList FilteredIRAnchorsList;
805 AnchorList FilteredProfileAnchorList;
806 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
807 FilteredProfileAnchorList);
808
809 // Similarly skip the matching if the num of anchors is not enough.
810 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
811 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
812 return false;
813
814 // Use the diff algorithm to find the LCS between IR and profile.
815
816 // Don't recursively match the callee function to avoid infinite matching,
817 // callee functions will be handled later since it's processed in top-down
818 // order .
819 LocToLocMap MatchedAnchors =
820 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
821 false /* Match unused functions */);
822
823 Similarity = static_cast<float>(MatchedAnchors.size()) /
824 FilteredProfileAnchorList.size();
825
826 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
827 << "(IR) and " << ProfFunc << "(profile) is "
828 << format("%.2f", Similarity) << "\n");
829 assert((Similarity >= 0 && Similarity <= 1.0) &&
830 "Similarity value should be in [0, 1]");
831 return Similarity * 100 > FuncProfileSimilarityThreshold;
832}
833
834// If FindMatchedProfileOnly is set to true, only use the processed function
835// results. This is used for skipping the repeated recursive matching.
836bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
837 const FunctionId &ProfFunc,
838 bool FindMatchedProfileOnly) {
839 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
840 if (R != FuncProfileMatchCache.end())
841 return R->second;
842
843 if (FindMatchedProfileOnly)
844 return false;
845
846 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
847 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
848 if (Matched) {
849 FuncToProfileNameMap[&IRFunc] = ProfFunc;
850 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
851 << " matches profile:" << ProfFunc << "\n");
852 }
853
854 return Matched;
855}
856
857void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
858 DenseSet<StringRef> ProfileSalvagedFuncs;
859 // Update FuncNameToProfNameMap and SymbolMap.
860 for (auto &I : FuncToProfileNameMap) {
861 assert(I.first && "New function is null");
862 FunctionId FuncName(I.first->getName());
863 ProfileSalvagedFuncs.insert(I.second.stringRef());
864 FuncNameToProfNameMap->emplace(FuncName, I.second);
865
866 // We need to remove the old entry to avoid duplicating the function
867 // processing.
868 SymbolMap->erase(FuncName);
869 SymbolMap->emplace(I.second, I.first);
870 }
871
872 // With extbinary profile format, initial profile loading only reads profile
873 // based on current function names in the module, so we need to load top-level
874 // profiles for functions with different profile name explicitly after
875 // function-profile name map is established with stale profile matching.
876 Reader.read(ProfileSalvagedFuncs);
877 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
878}
879
881 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
884 findFunctionsWithoutProfile();
885
886 // Process the matching in top-down order so that the caller matching result
887 // can be used to the callee matching.
888 std::vector<Function *> TopDownFunctionList;
889 TopDownFunctionList.reserve(M.size());
890 buildTopDownFuncOrder(CG, TopDownFunctionList);
891 for (auto *F : TopDownFunctionList) {
893 continue;
894 runOnFunction(*F);
895 }
896
898 UpdateWithSalvagedProfiles();
899
901 distributeIRToProfileLocationMap();
902
903 computeAndReportProfileStaleness();
904}
905
906void SampleProfileMatcher::distributeIRToProfileLocationMap(
907 FunctionSamples &FS) {
908 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
909 if (ProfileMappings != FuncMappings.end()) {
910 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
911 }
912
913 for (auto &Callees :
914 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
915 for (auto &FS : Callees.second) {
916 distributeIRToProfileLocationMap(FS.second);
917 }
918 }
919}
920
921// Use a central place to distribute the matching results. Outlined and inlined
922// profile with the function name will be set to the same pointer.
923void SampleProfileMatcher::distributeIRToProfileLocationMap() {
924 for (auto &I : Reader.getProfiles()) {
925 distributeIRToProfileLocationMap(I.second);
926 }
927}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
std::string Name
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:80
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinCallCountForCGMatching("min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching."))
cl::opt< bool > ReportProfileStaleness
static cl::opt< unsigned > FuncProfileSimilarityThreshold("func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile."))
cl::opt< bool > SalvageStaleProfile
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
static cl::opt< unsigned > MinFuncCountForCGMatching("min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching."))
cl::opt< bool > SalvageUnusedProfile
static cl::opt< bool > LoadFuncProfileforCGMatching("load-func-profile-for-cg-matching", cl::Hidden, cl::init(true), cl::desc("Load top-level profiles that the sample reader initially skipped for " "the call-graph matching (only meaningful for extended binary " "format)"))
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Debug location.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
size_t size() const
Definition: Function.h:856
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:381
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
size_t size() const
Definition: SmallVector.h:79
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
iterator end()
Definition: StringMap.h:224
iterator find(StringRef Key)
Definition: StringMap.h:235
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:278
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:25
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:39
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
StringRef stringRef() const
Convert to StringRef.
Definition: FunctionId.h:108
Representation of the samples collected for a function.
Definition: SampleProf.h:757
static LLVM_ABI bool ProfileIsCS
Definition: SampleProf.h:1200
static LLVM_ABI bool ProfileIsProbeBased
Definition: SampleProf.h:1198
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1102
static LLVM_ABI bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Definition: SampleProf.h:1215
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:250
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1209
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1437
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
std::error_code read()
The interface to read sample profiles from the associated file.
void setFuncNameToProfNameMap(const HashKeyMap< std::unordered_map, FunctionId, FunctionId > &FPMap)
virtual std::vector< FunctionId > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:214
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: SampleProf.h:750
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition: SampleProf.h:748
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
"Partial" demangler.
Definition: Demangle.h:85
Represents the relative location of an instruction.
Definition: SampleProf.h:283