LLVM 20.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/IR/MDBuilder.h"
19
20using namespace llvm;
21using namespace sampleprof;
22
23#define DEBUG_TYPE "sample-profile-matcher"
24
26 "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
27 cl::desc("Consider a profile matches a function if the similarity of their "
28 "callee sequences is above the specified percentile."));
29
31 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
32 cl::desc("The minimum number of basic blocks required for a function to "
33 "run stale profile call graph matching."));
34
36 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
37 cl::desc("The minimum number of call anchors required for a function to "
38 "run stale profile call graph matching."));
39
41 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
43 "Load top-level profiles that the sample reader initially skipped for "
44 "the call-graph matching (only meaningful for extended binary "
45 "format)"));
46
51
53 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
54 cl::desc("The maximum number of callsites in a function, above which stale "
55 "profile matching will be skipped."));
56
57void SampleProfileMatcher::findIRAnchors(const Function &F,
58 AnchorMap &IRAnchors) const {
59 // For inlined code, recover the original callsite and callee by finding the
60 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
61 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
62 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
63 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
64 const DILocation *PrevDIL = nullptr;
65 do {
66 PrevDIL = DIL;
67 DIL = DIL->getInlinedAt();
68 } while (DIL->getInlinedAt());
69
72 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
73 return std::make_pair(Callsite, FunctionId(CalleeName));
74 };
75
76 auto GetCanonicalCalleeName = [](const CallBase *CB) {
77 StringRef CalleeName = UnknownIndirectCallee;
78 if (Function *Callee = CB->getCalledFunction())
79 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
80 return CalleeName;
81 };
82
83 // Extract profile matching anchors in the IR.
84 for (auto &BB : F) {
85 for (auto &I : BB) {
86 DILocation *DIL = I.getDebugLoc();
87 if (!DIL)
88 continue;
89
91 if (auto Probe = extractProbe(I)) {
92 // Flatten inlined IR for the matching.
93 if (DIL->getInlinedAt()) {
94 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
95 } else {
96 // Use empty StringRef for basic block probe.
97 StringRef CalleeName;
98 if (const auto *CB = dyn_cast<CallBase>(&I)) {
99 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
100 if (!isa<IntrinsicInst>(&I))
101 CalleeName = GetCanonicalCalleeName(CB);
102 }
103 LineLocation Loc = LineLocation(Probe->Id, 0);
104 IRAnchors.emplace(Loc, FunctionId(CalleeName));
105 }
106 }
107 } else {
108 // TODO: For line-number based profile(AutoFDO), currently only support
109 // find callsite anchors. In future, we need to parse all the non-call
110 // instructions to extract the line locations for profile matching.
111 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
112 continue;
113
114 if (DIL->getInlinedAt()) {
115 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
116 } else {
119 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
120 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
121 }
122 }
123 }
124 }
125}
126
127void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
128 AnchorMap &ProfileAnchors) const {
129 auto isInvalidLineOffset = [](uint32_t LineOffset) {
130 return LineOffset & 0x8000;
131 };
132
133 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
134 AnchorMap &ProfileAnchors) {
135 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
136 if (!Ret.second) {
137 // For multiple callees, which indicates it's an indirect call, we use a
138 // dummy name(UnknownIndirectCallee) as the indrect callee name.
139 Ret.first->second = FunctionId(UnknownIndirectCallee);
140 }
141 };
142
143 for (const auto &I : FS.getBodySamples()) {
144 const LineLocation &Loc = I.first;
145 if (isInvalidLineOffset(Loc.LineOffset))
146 continue;
147 for (const auto &C : I.second.getCallTargets())
148 InsertAnchor(Loc, C.first, ProfileAnchors);
149 }
150
151 for (const auto &I : FS.getCallsiteSamples()) {
152 const LineLocation &Loc = I.first;
153 if (isInvalidLineOffset(Loc.LineOffset))
154 continue;
155 for (const auto &C : I.second)
156 InsertAnchor(Loc, C.first, ProfileAnchors);
157 }
158}
159
160bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
161 Function *&FuncWithoutProfile) {
162 FuncWithoutProfile = nullptr;
163 auto R = FunctionsWithoutProfile.find(IRFuncName);
164 if (R != FunctionsWithoutProfile.end())
165 FuncWithoutProfile = R->second;
166 return !FuncWithoutProfile;
167}
168
169bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
170 return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
171}
172
173bool SampleProfileMatcher::functionMatchesProfile(
174 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
175 bool FindMatchedProfileOnly) {
176 if (IRFuncName == ProfileFuncName)
177 return true;
179 return false;
180
181 // If IR function doesn't have profile and the profile is unused, try
182 // matching them.
183 Function *IRFunc = nullptr;
184 if (functionHasProfile(IRFuncName, IRFunc) ||
185 !isProfileUnused(ProfileFuncName))
186 return false;
187
188 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
189 "IR function should be different from profile function to match");
190 return functionMatchesProfile(*IRFunc, ProfileFuncName,
191 FindMatchedProfileOnly);
192}
193
195SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
196 const AnchorList &AnchorList2,
197 bool MatchUnusedFunction) {
198 LocToLocMap MatchedAnchors;
199 llvm::longestCommonSequence<LineLocation, FunctionId>(
200 AnchorList1, AnchorList2,
201 [&](const FunctionId &A, const FunctionId &B) {
202 return functionMatchesProfile(
203 A, B,
204 !MatchUnusedFunction // Find matched function only
205 );
206 },
208 MatchedAnchors.try_emplace(A, B);
209 });
210 return MatchedAnchors;
211}
212
213void SampleProfileMatcher::matchNonCallsiteLocs(
214 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
215 LocToLocMap &IRToProfileLocationMap) {
216 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
217 // Skip the unchanged location mapping to save memory.
218 if (From != To)
219 IRToProfileLocationMap.insert({From, To});
220 };
221
222 // Use function's beginning location as the initial anchor.
223 int32_t LocationDelta = 0;
224 SmallVector<LineLocation> LastMatchedNonAnchors;
225 for (const auto &IR : IRAnchors) {
226 const auto &Loc = IR.first;
227 bool IsMatchedAnchor = false;
228 // Match the anchor location in lexical order.
229 auto R = MatchedAnchors.find(Loc);
230 if (R != MatchedAnchors.end()) {
231 const auto &Candidate = R->second;
232 InsertMatching(Loc, Candidate);
233 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
234 << " is matched from " << Loc << " to " << Candidate
235 << "\n");
236 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
237
238 // Match backwards for non-anchor locations.
239 // The locations in LastMatchedNonAnchors have been matched forwards
240 // based on the previous anchor, spilt it evenly and overwrite the
241 // second half based on the current anchor.
242 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
243 I < LastMatchedNonAnchors.size(); I++) {
244 const auto &L = LastMatchedNonAnchors[I];
245 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
246 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
247 InsertMatching(L, Candidate);
248 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
249 << " to " << Candidate << "\n");
250 }
251
252 IsMatchedAnchor = true;
253 LastMatchedNonAnchors.clear();
254 }
255
256 // Match forwards for non-anchor locations.
257 if (!IsMatchedAnchor) {
258 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
259 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
260 InsertMatching(Loc, Candidate);
261 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
262 << Candidate << "\n");
263 LastMatchedNonAnchors.emplace_back(Loc);
264 }
265 }
266}
267
268// Filter the non-call locations from IRAnchors and ProfileAnchors and write
269// them into a list for random access later.
270void SampleProfileMatcher::getFilteredAnchorList(
271 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
272 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
273 for (const auto &I : IRAnchors) {
274 if (I.second.stringRef().empty())
275 continue;
276 FilteredIRAnchorsList.emplace_back(I);
277 }
278
279 for (const auto &I : ProfileAnchors)
280 FilteredProfileAnchorList.emplace_back(I);
281}
282
283// Call target name anchor based profile fuzzy matching.
284// Input:
285// For IR locations, the anchor is the callee name of direct callsite; For
286// profile locations, it's the call target name for BodySamples or inlinee's
287// profile name for CallsiteSamples.
288// Matching heuristic:
289// First match all the anchors using the diff algorithm, then split the
290// non-anchor locations between the two anchors evenly, first half are matched
291// based on the start anchor, second half are matched based on the end anchor.
292// For example, given:
293// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
294// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
295// The matching gives:
296// [1, 2(foo), 3, 5, 6(bar), 7]
297// | | | | | |
298// [1, 2, 3(foo), 4, 7, 8(bar), 9]
299// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
300void SampleProfileMatcher::runStaleProfileMatching(
301 const Function &F, const AnchorMap &IRAnchors,
302 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
303 bool RunCFGMatching, bool RunCGMatching) {
304 if (!RunCFGMatching && !RunCGMatching)
305 return;
306 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
307 << "\n");
308 assert(IRToProfileLocationMap.empty() &&
309 "Run stale profile matching only once per function");
310
311 AnchorList FilteredProfileAnchorList;
312 AnchorList FilteredIRAnchorsList;
313 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
314 FilteredProfileAnchorList);
315
316 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
317 return;
318
319 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
320 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
321 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
322 << " because the number of callsites in the IR is "
323 << FilteredIRAnchorsList.size()
324 << " and in the profile is "
325 << FilteredProfileAnchorList.size() << "\n");
326 return;
327 }
328
329 // Match the callsite anchors by finding the longest common subsequence
330 // between IR and profile.
331 // Define a match between two anchors as follows:
332 // 1) The function names of anchors are the same.
333 // 2) The similarity between the anchor functions is above a threshold if
334 // RunCGMatching is set.
335 // For 2), we only consider the anchor functions from IR and profile don't
336 // appear on either side to reduce the matching scope. Note that we need to
337 // use IR anchor as base(A side) to align with the order of
338 // IRToProfileLocationMap.
339 LocToLocMap MatchedAnchors =
340 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
341 RunCGMatching /* Match unused functions */);
342
343 // CFG level matching:
344 // Apply the callsite matchings to infer matching for the basic
345 // block(non-callsite) locations and write the result to
346 // IRToProfileLocationMap.
347 if (RunCFGMatching)
348 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
349}
350
351void SampleProfileMatcher::runOnFunction(Function &F) {
352 // We need to use flattened function samples for matching.
353 // Unlike IR, which includes all callsites from the source code, the callsites
354 // in profile only show up when they are hit by samples, i,e. the profile
355 // callsites in one context may differ from those in another context. To get
356 // the maximum number of callsites, we merge the function profiles from all
357 // contexts, aka, the flattened profile to find profile anchors.
358 const auto *FSForMatching = getFlattenedSamplesFor(F);
359 if (SalvageUnusedProfile && !FSForMatching) {
360 // Apply the matching in place to find the new function's matched profile.
361 auto R = FuncToProfileNameMap.find(&F);
362 if (R != FuncToProfileNameMap.end()) {
363 FSForMatching = getFlattenedSamplesFor(R->second);
364 // Try to find the salvaged top-level profiles that are explicitly loaded
365 // for the matching, see "functionMatchesProfileHelper" for the details.
366 if (!FSForMatching && LoadFuncProfileforCGMatching)
367 FSForMatching = Reader.getSamplesFor(R->second.stringRef());
368 }
369 }
370 if (!FSForMatching)
371 return;
372
373 // Anchors for IR. It's a map from IR location to callee name, callee name is
374 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
375 // for unknown indrect callee name.
376 AnchorMap IRAnchors;
377 findIRAnchors(F, IRAnchors);
378 // Anchors for profile. It's a map from callsite location to a set of callee
379 // name.
380 AnchorMap ProfileAnchors;
381 findProfileAnchors(*FSForMatching, ProfileAnchors);
382
383 // Compute the callsite match states for profile staleness report.
385 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
386
388 return;
389 // For probe-based profiles, run matching only when profile checksum is
390 // mismatched.
391 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
392 !ProbeManager->profileIsValid(F, *FSForMatching);
393 bool RunCFGMatching =
394 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
395 bool RunCGMatching = SalvageUnusedProfile;
396 // For imported functions, the checksum metadata(pseudo_probe_desc) are
397 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
398 // transfer the info: add the attribute during pre-link phase and check it
399 // during post-link phase(see "profileIsValid").
400 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
401 F.addFnAttr("profile-checksum-mismatch");
402
403 // The matching result will be saved to IRToProfileLocationMap, create a
404 // new map for each function.
405 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
406 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
407 RunCFGMatching, RunCGMatching);
408 // Find and update callsite match states after matching.
409 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
410 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
411 &IRToProfileLocationMap);
412}
413
414void SampleProfileMatcher::recordCallsiteMatchStates(
415 const Function &F, const AnchorMap &IRAnchors,
416 const AnchorMap &ProfileAnchors,
417 const LocToLocMap *IRToProfileLocationMap) {
418 bool IsPostMatch = IRToProfileLocationMap != nullptr;
419 auto &CallsiteMatchStates =
420 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
421
422 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
423 // IRToProfileLocationMap is null in pre-match phrase.
424 if (!IRToProfileLocationMap)
425 return IRLoc;
426 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
427 if (ProfileLoc != IRToProfileLocationMap->end())
428 return ProfileLoc->second;
429 else
430 return IRLoc;
431 };
432
433 for (const auto &I : IRAnchors) {
434 // After fuzzy profile matching, use the matching result to remap the
435 // current IR callsite.
436 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
437 const auto &IRCalleeId = I.second;
438 const auto &It = ProfileAnchors.find(ProfileLoc);
439 if (It == ProfileAnchors.end())
440 continue;
441 const auto &ProfCalleeId = It->second;
442 if (IRCalleeId == ProfCalleeId) {
443 auto It = CallsiteMatchStates.find(ProfileLoc);
444 if (It == CallsiteMatchStates.end())
445 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
446 else if (IsPostMatch) {
447 if (It->second == MatchState::InitialMatch)
448 It->second = MatchState::UnchangedMatch;
449 else if (It->second == MatchState::InitialMismatch)
450 It->second = MatchState::RecoveredMismatch;
451 }
452 }
453 }
454
455 // Check if there are any callsites in the profile that does not match to any
456 // IR callsites.
457 for (const auto &I : ProfileAnchors) {
458 const auto &Loc = I.first;
459 assert(!I.second.stringRef().empty() && "Callees should not be empty");
460 auto It = CallsiteMatchStates.find(Loc);
461 if (It == CallsiteMatchStates.end())
462 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
463 else if (IsPostMatch) {
464 // Update the state if it's not matched(UnchangedMatch or
465 // RecoveredMismatch).
466 if (It->second == MatchState::InitialMismatch)
467 It->second = MatchState::UnchangedMismatch;
468 else if (It->second == MatchState::InitialMatch)
469 It->second = MatchState::RemovedMatch;
470 }
471 }
472}
473
474void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
475 bool IsTopLevel) {
476 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
477 // Skip the function that is external or renamed.
478 if (!FuncDesc)
479 return;
480
481 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
482 if (IsTopLevel)
483 NumStaleProfileFunc++;
484 // Given currently all probe ids are after block probe ids, once the
485 // checksum is mismatched, it's likely all the callites are mismatched and
486 // dropped. We conservatively count all the samples as mismatched and stop
487 // counting the inlinees' profiles.
488 MismatchedFunctionSamples += FS.getTotalSamples();
489 return;
490 }
491
492 // Even the current-level function checksum is matched, it's possible that the
493 // nested inlinees' checksums are mismatched that affect the inlinee's sample
494 // loading, we need to go deeper to check the inlinees' function samples.
495 // Similarly, count all the samples as mismatched if the inlinee's checksum is
496 // mismatched using this recursive function.
497 for (const auto &I : FS.getCallsiteSamples())
498 for (const auto &CS : I.second)
499 countMismatchedFuncSamples(CS.second, false);
500}
501
502void SampleProfileMatcher::countMismatchedCallsiteSamples(
503 const FunctionSamples &FS) {
504 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
505 // Skip it if no mismatched callsite or this is an external function.
506 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
507 return;
508 const auto &CallsiteMatchStates = It->second;
509
510 auto findMatchState = [&](const LineLocation &Loc) {
511 auto It = CallsiteMatchStates.find(Loc);
512 if (It == CallsiteMatchStates.end())
513 return MatchState::Unknown;
514 return It->second;
515 };
516
517 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
518 uint64_t Samples) {
519 if (isMismatchState(State))
520 MismatchedCallsiteSamples += Samples;
521 else if (State == MatchState::RecoveredMismatch)
522 RecoveredCallsiteSamples += Samples;
523 };
524
525 // The non-inlined callsites are saved in the body samples of function
526 // profile, go through it to count the non-inlined callsite samples.
527 for (const auto &I : FS.getBodySamples())
528 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
529
530 // Count the inlined callsite samples.
531 for (const auto &I : FS.getCallsiteSamples()) {
532 auto State = findMatchState(I.first);
533 uint64_t CallsiteSamples = 0;
534 for (const auto &CS : I.second)
535 CallsiteSamples += CS.second.getTotalSamples();
536 AttributeMismatchedSamples(State, CallsiteSamples);
537
538 if (isMismatchState(State))
539 continue;
540
541 // When the current level of inlined call site matches the profiled call
542 // site, we need to go deeper along the inline tree to count mismatches from
543 // lower level inlinees.
544 for (const auto &CS : I.second)
545 countMismatchedCallsiteSamples(CS.second);
546 }
547}
548
549void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
550 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
551 // Skip it if no mismatched callsite or this is an external function.
552 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
553 return;
554 const auto &MatchStates = It->second;
555 [[maybe_unused]] bool OnInitialState =
556 isInitialState(MatchStates.begin()->second);
557 for (const auto &I : MatchStates) {
558 TotalProfiledCallsites++;
559 assert(
560 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
561 "Profile matching state is inconsistent");
562
563 if (isMismatchState(I.second))
564 NumMismatchedCallsites++;
565 else if (I.second == MatchState::RecoveredMismatch)
566 NumRecoveredCallsites++;
567 }
568}
569
570void SampleProfileMatcher::countCallGraphRecoveredSamples(
571 const FunctionSamples &FS,
572 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
573 if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
574 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
575 return;
576 }
577
578 for (const auto &CM : FS.getCallsiteSamples()) {
579 for (const auto &CS : CM.second) {
580 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
581 }
582 }
583}
584
585void SampleProfileMatcher::computeAndReportProfileStaleness() {
587 return;
588
589 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
591 for (const auto &I : FuncToProfileNameMap) {
592 CallGraphRecoveredProfiles.insert(I.second);
593 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
594 continue;
595 NumCallGraphRecoveredProfiledFunc++;
596 }
597 }
598
599 // Count profile mismatches for profile staleness report.
600 for (const auto &F : M) {
602 continue;
603 // As the stats will be merged by linker, skip reporting the metrics for
604 // imported functions to avoid repeated counting.
606 continue;
607 const auto *FS = Reader.getSamplesFor(F);
608 if (!FS)
609 continue;
610 TotalProfiledFunc++;
611 TotalFunctionSamples += FS->getTotalSamples();
612
613 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
614 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
615
616 // Checksum mismatch is only used in pseudo-probe mode.
618 countMismatchedFuncSamples(*FS, true);
619
620 // Count mismatches and samples for calliste.
621 countMismatchCallsites(*FS);
622 countMismatchedCallsiteSamples(*FS);
623 }
624
627 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
628 << ") of functions' profile are invalid and ("
629 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
630 << ") of samples are discarded due to function hash mismatch.\n";
631 }
633 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
634 << TotalProfiledFunc << ") of functions' profile are matched and ("
635 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
636 << ") of samples are reused by call graph matching.\n";
637 }
638
639 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
640 << TotalProfiledCallsites
641 << ") of callsites' profile are invalid and ("
642 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
643 << TotalFunctionSamples
644 << ") of samples are discarded due to callsite location mismatch.\n";
645 errs() << "(" << NumRecoveredCallsites << "/"
646 << (NumRecoveredCallsites + NumMismatchedCallsites)
647 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
648 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
649 << ") of samples are recovered by stale profile matching.\n";
650 }
651
653 LLVMContext &Ctx = M.getContext();
654 MDBuilder MDB(Ctx);
655
658 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
659 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
660 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
661 MismatchedFunctionSamples);
662 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
663 }
664
666 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
667 NumCallGraphRecoveredProfiledFunc);
668 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
669 NumCallGraphRecoveredFuncSamples);
670 }
671
672 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
673 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
674 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
675 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
676 MismatchedCallsiteSamples);
677 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
678 RecoveredCallsiteSamples);
679
680 auto *MD = MDB.createLLVMStats(ProfStatsVec);
681 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
682 NMD->addOperand(MD);
683 }
684}
685
686void SampleProfileMatcher::findFunctionsWithoutProfile() {
687 // TODO: Support MD5 profile.
689 return;
690 StringSet<> NamesInProfile;
691 if (auto NameTable = Reader.getNameTable()) {
692 for (auto Name : *NameTable)
693 NamesInProfile.insert(Name.stringRef());
694 }
695
696 for (auto &F : M) {
697 // Skip declarations, as even if the function can be matched, we have
698 // nothing to do with it.
699 if (F.isDeclaration())
700 continue;
701
702 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
703 const auto *FS = getFlattenedSamplesFor(F);
704 if (FS)
705 continue;
706
707 // For extended binary, functions fully inlined may not be loaded in the
708 // top-level profile, so check the NameTable which has the all symbol names
709 // in profile.
710 if (NamesInProfile.count(CanonFName))
711 continue;
712
713 // For extended binary, non-profiled function symbols are in the profile
714 // symbol list table.
715 if (PSL && PSL->contains(CanonFName))
716 continue;
717
718 LLVM_DEBUG(dbgs() << "Function " << CanonFName
719 << " is not in profile or profile symbol list.\n");
720 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
721 }
722}
723
724bool SampleProfileMatcher::functionMatchesProfileHelper(
725 const Function &IRFunc, const FunctionId &ProfFunc) {
726 // The value is in the range [0, 1]. The bigger the value is, the more similar
727 // two sequences are.
728 float Similarity = 0.0;
729
730 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
731 // With extbinary profile format, initial profile loading only reads profile
732 // based on current function names in the module.
733 // However, if a function is renamed, sample loader skips to load its original
734 // profile(which has a different name), we will miss this case. To address
735 // this, we load the top-level profile candidate explicitly for the matching.
736 if (!FSForMatching && LoadFuncProfileforCGMatching) {
737 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
738 if (std::error_code EC = Reader.read(TopLevelFunc))
739 return false;
740 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
741 LLVM_DEBUG({
742 if (FSForMatching)
743 dbgs() << "Read top-level function " << ProfFunc
744 << " for call-graph matching\n";
745 });
746 }
747 if (!FSForMatching)
748 return false;
749 // The check for similarity or checksum may not be reliable if the function is
750 // tiny, we use the number of basic block as a proxy for the function
751 // complexity and skip the matching if it's too small.
752 if (IRFunc.size() < MinFuncCountForCGMatching ||
753 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
754 return false;
755
756 // For probe-based function, we first trust the checksum info. If the checksum
757 // doesn't match, we continue checking for similarity.
759 const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
760 if (FuncDesc &&
761 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
762 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
763 << "(IR) and " << ProfFunc << "(Profile) match.\n");
764
765 return true;
766 }
767 }
768
769 AnchorMap IRAnchors;
770 findIRAnchors(IRFunc, IRAnchors);
771 AnchorMap ProfileAnchors;
772 findProfileAnchors(*FSForMatching, ProfileAnchors);
773
774 AnchorList FilteredIRAnchorsList;
775 AnchorList FilteredProfileAnchorList;
776 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
777 FilteredProfileAnchorList);
778
779 // Similarly skip the matching if the num of anchors is not enough.
780 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
781 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
782 return false;
783
784 // Use the diff algorithm to find the LCS between IR and profile.
785
786 // Don't recursively match the callee function to avoid infinite matching,
787 // callee functions will be handled later since it's processed in top-down
788 // order .
789 LocToLocMap MatchedAnchors =
790 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
791 false /* Match unused functions */);
792
793 Similarity =
794 static_cast<float>(MatchedAnchors.size()) * 2 /
795 (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
796
797 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
798 << "(IR) and " << ProfFunc << "(profile) is "
799 << format("%.2f", Similarity) << "\n");
800 assert((Similarity >= 0 && Similarity <= 1.0) &&
801 "Similarity value should be in [0, 1]");
802 return Similarity * 100 > FuncProfileSimilarityThreshold;
803}
804
805// If FindMatchedProfileOnly is set to true, only use the processed function
806// results. This is used for skipping the repeated recursive matching.
807bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
808 const FunctionId &ProfFunc,
809 bool FindMatchedProfileOnly) {
810 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
811 if (R != FuncProfileMatchCache.end())
812 return R->second;
813
814 if (FindMatchedProfileOnly)
815 return false;
816
817 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
818 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
819 if (Matched) {
820 FuncToProfileNameMap[&IRFunc] = ProfFunc;
821 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
822 << " matches profile:" << ProfFunc << "\n");
823 }
824
825 return Matched;
826}
827
828void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
829 DenseSet<StringRef> ProfileSalvagedFuncs;
830 // Update FuncNameToProfNameMap and SymbolMap.
831 for (auto &I : FuncToProfileNameMap) {
832 assert(I.first && "New function is null");
833 FunctionId FuncName(I.first->getName());
834 ProfileSalvagedFuncs.insert(I.second.stringRef());
835 FuncNameToProfNameMap->emplace(FuncName, I.second);
836
837 // We need to remove the old entry to avoid duplicating the function
838 // processing.
839 SymbolMap->erase(FuncName);
840 SymbolMap->emplace(I.second, I.first);
841 }
842
843 // With extbinary profile format, initial profile loading only reads profile
844 // based on current function names in the module, so we need to load top-level
845 // profiles for functions with different profile name explicitly after
846 // function-profile name map is established with stale profile matching.
847 Reader.read(ProfileSalvagedFuncs);
848 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
849}
850
852 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
855 findFunctionsWithoutProfile();
856
857 // Process the matching in top-down order so that the caller matching result
858 // can be used to the callee matching.
859 std::vector<Function *> TopDownFunctionList;
860 TopDownFunctionList.reserve(M.size());
861 buildTopDownFuncOrder(CG, TopDownFunctionList);
862 for (auto *F : TopDownFunctionList) {
864 continue;
865 runOnFunction(*F);
866 }
867
869 UpdateWithSalvagedProfiles();
870
872 distributeIRToProfileLocationMap();
873
874 computeAndReportProfileStaleness();
875}
876
877void SampleProfileMatcher::distributeIRToProfileLocationMap(
878 FunctionSamples &FS) {
879 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
880 if (ProfileMappings != FuncMappings.end()) {
881 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
882 }
883
884 for (auto &Callees :
885 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
886 for (auto &FS : Callees.second) {
887 distributeIRToProfileLocationMap(FS.second);
888 }
889 }
890}
891
892// Use a central place to distribute the matching results. Outlined and inlined
893// profile with the function name will be set to the same pointer.
894void SampleProfileMatcher::distributeIRToProfileLocationMap() {
895 for (auto &I : Reader.getProfiles()) {
896 distributeIRToProfileLocationMap(I.second);
897 }
898}
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:80
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > MinCallCountForCGMatching("min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching."))
cl::opt< bool > ReportProfileStaleness
static cl::opt< bool > LoadFuncProfileforCGMatching("load-func-profile-for-cg-matching", cl::Hidden, cl::init(false), cl::desc("Load top-level profiles that the sample reader initially skipped for " "the call-graph matching (only meaningful for extended binary " "format)"))
static cl::opt< unsigned > FuncProfileSimilarityThreshold("func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile."))
cl::opt< bool > SalvageStaleProfile
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
static cl::opt< unsigned > MinFuncCountForCGMatching("min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching."))
cl::opt< bool > SalvageUnusedProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Debug location.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
size_t size() const
Definition: Function.h:858
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
size_t size() const
Definition: SmallVector.h:78
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
iterator end()
Definition: StringMap.h:220
iterator find(StringRef Key)
Definition: StringMap.h:233
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
StringRef stringRef() const
Convert to StringRef.
Definition: FunctionId.h:108
Representation of the samples collected for a function.
Definition: SampleProf.h:745
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1090
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Definition: SampleProf.h:1203
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1197
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1424
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
std::error_code read()
The interface to read sample profiles from the associated file.
void setFuncNameToProfNameMap(const HashKeyMap< std::unordered_map, FunctionId, FunctionId > &FPMap)
virtual std::vector< FunctionId > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:211
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: SampleProf.h:738
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition: SampleProf.h:736
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.
Definition: SampleProf.h:280