LLVM 22.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
17#include "llvm/IR/MDBuilder.h"
20
21#include <unordered_set>
22
23using namespace llvm;
24using namespace sampleprof;
25
26#define DEBUG_TYPE "sample-profile-matcher"
27
28namespace llvm {
29
31 "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
32 cl::desc("Consider a profile matches a function if the similarity of their "
33 "callee sequences is above the specified percentile."));
34
36 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
37 cl::desc("The minimum number of basic blocks required for a function to "
38 "run stale profile call graph matching."));
39
41 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
42 cl::desc("The minimum number of call anchors required for a function to "
43 "run stale profile call graph matching."));
44
46 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true),
48 "Load top-level profiles that the sample reader initially skipped for "
49 "the call-graph matching (only meaningful for extended binary "
50 "format)"));
51
56
58 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
59 cl::desc("The maximum number of callsites in a function, above which stale "
60 "profile matching will be skipped."));
61
62} // end namespace llvm
63
64void SampleProfileMatcher::findIRAnchors(const Function &F,
65 AnchorMap &IRAnchors) const {
66 // For inlined code, recover the original callsite and callee by finding the
67 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
68 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
69 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
70 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
71 const DILocation *PrevDIL = nullptr;
72 do {
73 PrevDIL = DIL;
74 DIL = DIL->getInlinedAt();
75 } while (DIL->getInlinedAt());
76
77 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
79 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
80 return std::make_pair(Callsite, FunctionId(CalleeName));
81 };
82
83 auto GetCanonicalCalleeName = [](const CallBase *CB) {
84 StringRef CalleeName = UnknownIndirectCallee;
85 if (Function *Callee = CB->getCalledFunction())
86 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
87 return CalleeName;
88 };
89
90 // Extract profile matching anchors in the IR.
91 for (auto &BB : F) {
92 for (auto &I : BB) {
93 DILocation *DIL = I.getDebugLoc();
94 if (!DIL)
95 continue;
96
98 if (auto Probe = extractProbe(I)) {
99 // Flatten inlined IR for the matching.
100 if (DIL->getInlinedAt()) {
101 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
102 } else {
103 // Use empty StringRef for basic block probe.
104 StringRef CalleeName;
105 if (const auto *CB = dyn_cast<CallBase>(&I)) {
106 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
107 if (!isa<IntrinsicInst>(&I))
108 CalleeName = GetCanonicalCalleeName(CB);
109 }
110 LineLocation Loc = LineLocation(Probe->Id, 0);
111 IRAnchors.emplace(Loc, FunctionId(CalleeName));
112 }
113 }
114 } else {
115 // TODO: For line-number based profile(AutoFDO), currently only support
116 // find callsite anchors. In future, we need to parse all the non-call
117 // instructions to extract the line locations for profile matching.
119 continue;
120
121 if (DIL->getInlinedAt()) {
122 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
123 } else {
124 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
126 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
127 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
128 }
129 }
130 }
131 }
132}
133
134void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
135 AnchorMap &ProfileAnchors) const {
136 auto isInvalidLineOffset = [](uint32_t LineOffset) {
137 return LineOffset & 0x8000;
138 };
139
140 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
141 AnchorMap &ProfileAnchors) {
142 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
143 if (!Ret.second) {
144 // For multiple callees, which indicates it's an indirect call, we use a
145 // dummy name(UnknownIndirectCallee) as the indrect callee name.
146 Ret.first->second = FunctionId(UnknownIndirectCallee);
147 }
148 };
149
150 for (const auto &I : FS.getBodySamples()) {
151 const LineLocation &Loc = I.first;
152 if (isInvalidLineOffset(Loc.LineOffset))
153 continue;
154 for (const auto &C : I.second.getCallTargets())
155 InsertAnchor(Loc, C.first, ProfileAnchors);
156 }
157
158 for (const auto &I : FS.getCallsiteSamples()) {
159 const LineLocation &Loc = I.first;
160 if (isInvalidLineOffset(Loc.LineOffset))
161 continue;
162 for (const auto &C : I.second)
163 InsertAnchor(Loc, C.first, ProfileAnchors);
164 }
165}
166
167bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
168 Function *&FuncWithoutProfile) {
169 FuncWithoutProfile = nullptr;
170 auto R = FunctionsWithoutProfile.find(IRFuncName);
171 if (R != FunctionsWithoutProfile.end())
172 FuncWithoutProfile = R->second;
173 return !FuncWithoutProfile;
174}
175
176bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
177 return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
178}
179
180bool SampleProfileMatcher::functionMatchesProfile(
181 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
182 bool FindMatchedProfileOnly) {
183 if (IRFuncName == ProfileFuncName)
184 return true;
186 return false;
187
188 // If IR function doesn't have profile and the profile is unused, try
189 // matching them.
190 Function *IRFunc = nullptr;
191 if (functionHasProfile(IRFuncName, IRFunc) ||
192 !isProfileUnused(ProfileFuncName))
193 return false;
194
195 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
196 "IR function should be different from profile function to match");
197 return functionMatchesProfile(*IRFunc, ProfileFuncName,
198 FindMatchedProfileOnly);
199}
200
202SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
203 const AnchorList &AnchorList2,
204 bool MatchUnusedFunction) {
205 LocToLocMap MatchedAnchors;
207 AnchorList1, AnchorList2,
208 [&](const FunctionId &A, const FunctionId &B) {
209 return functionMatchesProfile(
210 A, B,
211 !MatchUnusedFunction // Find matched function only
212 );
213 },
214 [&](LineLocation A, LineLocation B) {
215 MatchedAnchors.try_emplace(A, B);
216 });
217 return MatchedAnchors;
218}
219
220void SampleProfileMatcher::matchNonCallsiteLocs(
221 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
222 LocToLocMap &IRToProfileLocationMap) {
223 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
224 // Skip the unchanged location mapping to save memory.
225 if (From != To)
226 IRToProfileLocationMap.insert({From, To});
227 };
228
229 // Use function's beginning location as the initial anchor.
230 int32_t LocationDelta = 0;
231 SmallVector<LineLocation> LastMatchedNonAnchors;
232 for (const auto &IR : IRAnchors) {
233 const auto &Loc = IR.first;
234 bool IsMatchedAnchor = false;
235 // Match the anchor location in lexical order.
236 auto R = MatchedAnchors.find(Loc);
237 if (R != MatchedAnchors.end()) {
238 const auto &Candidate = R->second;
239 InsertMatching(Loc, Candidate);
240 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
241 << " is matched from " << Loc << " to " << Candidate
242 << "\n");
243 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
244
245 // Match backwards for non-anchor locations.
246 // The locations in LastMatchedNonAnchors have been matched forwards
247 // based on the previous anchor, spilt it evenly and overwrite the
248 // second half based on the current anchor.
249 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
250 I < LastMatchedNonAnchors.size(); I++) {
251 const auto &L = LastMatchedNonAnchors[I];
252 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
253 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
254 InsertMatching(L, Candidate);
255 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
256 << " to " << Candidate << "\n");
257 }
258
259 IsMatchedAnchor = true;
260 LastMatchedNonAnchors.clear();
261 }
262
263 // Match forwards for non-anchor locations.
264 if (!IsMatchedAnchor) {
265 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
266 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
267 InsertMatching(Loc, Candidate);
268 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
269 << Candidate << "\n");
270 LastMatchedNonAnchors.emplace_back(Loc);
271 }
272 }
273}
274
275// Filter the non-call locations from IRAnchors and ProfileAnchors and write
276// them into a list for random access later.
277void SampleProfileMatcher::getFilteredAnchorList(
278 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
279 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
280 for (const auto &I : IRAnchors) {
281 if (I.second.stringRef().empty())
282 continue;
283 FilteredIRAnchorsList.emplace_back(I);
284 }
285
286 for (const auto &I : ProfileAnchors)
287 FilteredProfileAnchorList.emplace_back(I);
288}
289
290// Call target name anchor based profile fuzzy matching.
291// Input:
292// For IR locations, the anchor is the callee name of direct callsite; For
293// profile locations, it's the call target name for BodySamples or inlinee's
294// profile name for CallsiteSamples.
295// Matching heuristic:
296// First match all the anchors using the diff algorithm, then split the
297// non-anchor locations between the two anchors evenly, first half are matched
298// based on the start anchor, second half are matched based on the end anchor.
299// For example, given:
300// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
301// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
302// The matching gives:
303// [1, 2(foo), 3, 5, 6(bar), 7]
304// | | | | | |
305// [1, 2, 3(foo), 4, 7, 8(bar), 9]
306// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
307void SampleProfileMatcher::runStaleProfileMatching(
308 const Function &F, const AnchorMap &IRAnchors,
309 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
310 bool RunCFGMatching, bool RunCGMatching) {
311 if (!RunCFGMatching && !RunCGMatching)
312 return;
313 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
314 << "\n");
315 assert(IRToProfileLocationMap.empty() &&
316 "Run stale profile matching only once per function");
317
318 AnchorList FilteredProfileAnchorList;
319 AnchorList FilteredIRAnchorsList;
320 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
321 FilteredProfileAnchorList);
322
323 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
324 return;
325
326 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
327 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
328 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
329 << " because the number of callsites in the IR is "
330 << FilteredIRAnchorsList.size()
331 << " and in the profile is "
332 << FilteredProfileAnchorList.size() << "\n");
333 return;
334 }
335
336 // Match the callsite anchors by finding the longest common subsequence
337 // between IR and profile.
338 // Define a match between two anchors as follows:
339 // 1) The function names of anchors are the same.
340 // 2) The similarity between the anchor functions is above a threshold if
341 // RunCGMatching is set.
342 // For 2), we only consider the anchor functions from IR and profile don't
343 // appear on either side to reduce the matching scope. Note that we need to
344 // use IR anchor as base(A side) to align with the order of
345 // IRToProfileLocationMap.
346 LocToLocMap MatchedAnchors =
347 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
348 RunCGMatching /* Match unused functions */);
349
350 // CFG level matching:
351 // Apply the callsite matchings to infer matching for the basic
352 // block(non-callsite) locations and write the result to
353 // IRToProfileLocationMap.
354 if (RunCFGMatching)
355 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
356}
357
358void SampleProfileMatcher::runOnFunction(Function &F) {
359 // We need to use flattened function samples for matching.
360 // Unlike IR, which includes all callsites from the source code, the callsites
361 // in profile only show up when they are hit by samples, i,e. the profile
362 // callsites in one context may differ from those in another context. To get
363 // the maximum number of callsites, we merge the function profiles from all
364 // contexts, aka, the flattened profile to find profile anchors.
365 const auto *FSForMatching = getFlattenedSamplesFor(F);
366 if (SalvageUnusedProfile && !FSForMatching) {
367 // Apply the matching in place to find the new function's matched profile.
368 auto R = FuncToProfileNameMap.find(&F);
369 if (R != FuncToProfileNameMap.end()) {
370 FSForMatching = getFlattenedSamplesFor(R->second);
371 // Try to find the salvaged top-level profiles that are explicitly loaded
372 // for the matching, see "functionMatchesProfileHelper" for the details.
373 if (!FSForMatching && LoadFuncProfileforCGMatching)
374 FSForMatching = Reader.getSamplesFor(R->second.stringRef());
375 }
376 }
377 if (!FSForMatching)
378 return;
379
380 // Anchors for IR. It's a map from IR location to callee name, callee name is
381 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
382 // for unknown indrect callee name.
383 AnchorMap IRAnchors;
384 findIRAnchors(F, IRAnchors);
385 // Anchors for profile. It's a map from callsite location to a set of callee
386 // name.
387 AnchorMap ProfileAnchors;
388 findProfileAnchors(*FSForMatching, ProfileAnchors);
389
390 // Compute the callsite match states for profile staleness report.
392 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
393
395 return;
396 // For probe-based profiles, run matching only when profile checksum is
397 // mismatched.
398 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
399 !ProbeManager->profileIsValid(F, *FSForMatching);
400 bool RunCFGMatching =
401 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
402 bool RunCGMatching = SalvageUnusedProfile;
403 // For imported functions, the checksum metadata(pseudo_probe_desc) are
404 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
405 // transfer the info: add the attribute during pre-link phase and check it
406 // during post-link phase(see "profileIsValid").
407 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
408 F.addFnAttr("profile-checksum-mismatch");
409
410 // The matching result will be saved to IRToProfileLocationMap, create a
411 // new map for each function.
412 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
413 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
414 RunCFGMatching, RunCGMatching);
415 // Find and update callsite match states after matching.
416 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
417 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
418 &IRToProfileLocationMap);
419}
420
421void SampleProfileMatcher::recordCallsiteMatchStates(
422 const Function &F, const AnchorMap &IRAnchors,
423 const AnchorMap &ProfileAnchors,
424 const LocToLocMap *IRToProfileLocationMap) {
425 bool IsPostMatch = IRToProfileLocationMap != nullptr;
426 auto &CallsiteMatchStates =
427 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
428
429 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
430 // IRToProfileLocationMap is null in pre-match phrase.
431 if (!IRToProfileLocationMap)
432 return IRLoc;
433 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
434 if (ProfileLoc != IRToProfileLocationMap->end())
435 return ProfileLoc->second;
436 else
437 return IRLoc;
438 };
439
440 for (const auto &I : IRAnchors) {
441 // After fuzzy profile matching, use the matching result to remap the
442 // current IR callsite.
443 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
444 const auto &IRCalleeId = I.second;
445 const auto &It = ProfileAnchors.find(ProfileLoc);
446 if (It == ProfileAnchors.end())
447 continue;
448 const auto &ProfCalleeId = It->second;
449 if (IRCalleeId == ProfCalleeId) {
450 auto It = CallsiteMatchStates.find(ProfileLoc);
451 if (It == CallsiteMatchStates.end())
452 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
453 else if (IsPostMatch) {
454 if (It->second == MatchState::InitialMatch)
455 It->second = MatchState::UnchangedMatch;
456 else if (It->second == MatchState::InitialMismatch)
457 It->second = MatchState::RecoveredMismatch;
458 }
459 }
460 }
461
462 // Check if there are any callsites in the profile that does not match to any
463 // IR callsites.
464 for (const auto &I : ProfileAnchors) {
465 const auto &Loc = I.first;
466 assert(!I.second.stringRef().empty() && "Callees should not be empty");
467 auto It = CallsiteMatchStates.find(Loc);
468 if (It == CallsiteMatchStates.end())
469 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
470 else if (IsPostMatch) {
471 // Update the state if it's not matched(UnchangedMatch or
472 // RecoveredMismatch).
473 if (It->second == MatchState::InitialMismatch)
474 It->second = MatchState::UnchangedMismatch;
475 else if (It->second == MatchState::InitialMatch)
476 It->second = MatchState::RemovedMatch;
477 }
478 }
479}
480
481void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
482 bool IsTopLevel) {
483 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
484 // Skip the function that is external or renamed.
485 if (!FuncDesc)
486 return;
487
488 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
489 if (IsTopLevel)
490 NumStaleProfileFunc++;
491 // Given currently all probe ids are after block probe ids, once the
492 // checksum is mismatched, it's likely all the callites are mismatched and
493 // dropped. We conservatively count all the samples as mismatched and stop
494 // counting the inlinees' profiles.
495 MismatchedFunctionSamples += FS.getTotalSamples();
496 return;
497 }
498
499 // Even the current-level function checksum is matched, it's possible that the
500 // nested inlinees' checksums are mismatched that affect the inlinee's sample
501 // loading, we need to go deeper to check the inlinees' function samples.
502 // Similarly, count all the samples as mismatched if the inlinee's checksum is
503 // mismatched using this recursive function.
504 for (const auto &I : FS.getCallsiteSamples())
505 for (const auto &CS : I.second)
506 countMismatchedFuncSamples(CS.second, false);
507}
508
509void SampleProfileMatcher::countMismatchedCallsiteSamples(
510 const FunctionSamples &FS) {
511 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
512 // Skip it if no mismatched callsite or this is an external function.
513 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
514 return;
515 const auto &CallsiteMatchStates = It->second;
516
517 auto findMatchState = [&](const LineLocation &Loc) {
518 auto It = CallsiteMatchStates.find(Loc);
519 if (It == CallsiteMatchStates.end())
520 return MatchState::Unknown;
521 return It->second;
522 };
523
524 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
525 uint64_t Samples) {
526 if (isMismatchState(State))
527 MismatchedCallsiteSamples += Samples;
528 else if (State == MatchState::RecoveredMismatch)
529 RecoveredCallsiteSamples += Samples;
530 };
531
532 // The non-inlined callsites are saved in the body samples of function
533 // profile, go through it to count the non-inlined callsite samples.
534 for (const auto &I : FS.getBodySamples())
535 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
536
537 // Count the inlined callsite samples.
538 for (const auto &I : FS.getCallsiteSamples()) {
539 auto State = findMatchState(I.first);
540 uint64_t CallsiteSamples = 0;
541 for (const auto &CS : I.second)
542 CallsiteSamples += CS.second.getTotalSamples();
543 AttributeMismatchedSamples(State, CallsiteSamples);
544
545 if (isMismatchState(State))
546 continue;
547
548 // When the current level of inlined call site matches the profiled call
549 // site, we need to go deeper along the inline tree to count mismatches from
550 // lower level inlinees.
551 for (const auto &CS : I.second)
552 countMismatchedCallsiteSamples(CS.second);
553 }
554}
555
556void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
557 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
558 // Skip it if no mismatched callsite or this is an external function.
559 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
560 return;
561 const auto &MatchStates = It->second;
562 [[maybe_unused]] bool OnInitialState =
563 isInitialState(MatchStates.begin()->second);
564 for (const auto &I : MatchStates) {
565 TotalProfiledCallsites++;
566 assert(
567 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
568 "Profile matching state is inconsistent");
569
570 if (isMismatchState(I.second))
571 NumMismatchedCallsites++;
572 else if (I.second == MatchState::RecoveredMismatch)
573 NumRecoveredCallsites++;
574 }
575}
576
577void SampleProfileMatcher::countCallGraphRecoveredSamples(
578 const FunctionSamples &FS,
579 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
580 if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
581 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
582 return;
583 }
584
585 for (const auto &CM : FS.getCallsiteSamples()) {
586 for (const auto &CS : CM.second) {
587 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
588 }
589 }
590}
591
592void SampleProfileMatcher::computeAndReportProfileStaleness() {
594 return;
595
596 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
598 for (const auto &I : FuncToProfileNameMap) {
599 CallGraphRecoveredProfiles.insert(I.second);
600 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
601 continue;
602 NumCallGraphRecoveredProfiledFunc++;
603 }
604 }
605
606 // Count profile mismatches for profile staleness report.
607 for (const auto &F : M) {
609 continue;
610 // As the stats will be merged by linker, skip reporting the metrics for
611 // imported functions to avoid repeated counting.
613 continue;
614 const auto *FS = Reader.getSamplesFor(F);
615 if (!FS)
616 continue;
617 TotalProfiledFunc++;
618 TotalFunctionSamples += FS->getTotalSamples();
619
620 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
621 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
622
623 // Checksum mismatch is only used in pseudo-probe mode.
625 countMismatchedFuncSamples(*FS, true);
626
627 // Count mismatches and samples for calliste.
628 countMismatchCallsites(*FS);
629 countMismatchedCallsiteSamples(*FS);
630 }
631
634 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
635 << ") of functions' profile are invalid and ("
636 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
637 << ") of samples are discarded due to function hash mismatch.\n";
638 }
640 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
641 << TotalProfiledFunc << ") of functions' profile are matched and ("
642 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
643 << ") of samples are reused by call graph matching.\n";
644 }
645
646 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
647 << TotalProfiledCallsites
648 << ") of callsites' profile are invalid and ("
649 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
650 << TotalFunctionSamples
651 << ") of samples are discarded due to callsite location mismatch.\n";
652 errs() << "(" << NumRecoveredCallsites << "/"
653 << (NumRecoveredCallsites + NumMismatchedCallsites)
654 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
655 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
656 << ") of samples are recovered by stale profile matching.\n";
657 }
658
660 LLVMContext &Ctx = M.getContext();
661 MDBuilder MDB(Ctx);
662
665 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
666 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
667 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
668 MismatchedFunctionSamples);
669 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
670 }
671
673 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
674 NumCallGraphRecoveredProfiledFunc);
675 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
676 NumCallGraphRecoveredFuncSamples);
677 }
678
679 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
680 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
681 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
682 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
683 MismatchedCallsiteSamples);
684 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
685 RecoveredCallsiteSamples);
686
687 auto *MD = MDB.createLLVMStats(ProfStatsVec);
688 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
689 NMD->addOperand(MD);
690 }
691}
692
693void SampleProfileMatcher::findFunctionsWithoutProfile() {
694 // TODO: Support MD5 profile.
696 return;
697 StringSet<> NamesInProfile;
698 if (auto NameTable = Reader.getNameTable()) {
699 for (auto Name : *NameTable)
700 NamesInProfile.insert(Name.stringRef());
701 }
702
703 for (auto &F : M) {
704 // Skip declarations, as even if the function can be matched, we have
705 // nothing to do with it.
706 if (F.isDeclaration())
707 continue;
708
709 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
710 const auto *FS = getFlattenedSamplesFor(F);
711 if (FS)
712 continue;
713
714 // For extended binary, functions fully inlined may not be loaded in the
715 // top-level profile, so check the NameTable which has the all symbol names
716 // in profile.
717 if (NamesInProfile.count(CanonFName))
718 continue;
719
720 // For extended binary, non-profiled function symbols are in the profile
721 // symbol list table.
722 if (PSL && PSL->contains(CanonFName))
723 continue;
724
725 LLVM_DEBUG(dbgs() << "Function " << CanonFName
726 << " is not in profile or profile symbol list.\n");
727 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
728 }
729}
730
731bool SampleProfileMatcher::functionMatchesProfileHelper(
732 const Function &IRFunc, const FunctionId &ProfFunc) {
733 // The value is in the range [0, 1]. The bigger the value is, the more similar
734 // two sequences are.
735 float Similarity = 0.0;
736
737 // Match the functions if they have the same base name(after demangling) and
738 // skip the similarity check.
739 ItaniumPartialDemangler Demangler;
740 // Helper lambda to demangle and get the base name. If the demangling failed,
741 // return an empty string.
742 auto GetBaseName = [&](StringRef FName) {
743 auto FunctionName = FName.str();
744 if (Demangler.partialDemangle(FunctionName.c_str()))
745 return std::string();
746 size_t BaseNameSize = 0;
747 // The demangler API follows the __cxa_demangle one, and thus needs a
748 // pointer that originates from malloc (or nullptr) and the caller is
749 // responsible for free()-ing the buffer.
750 char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
751 std::string Result = (BaseNamePtr && BaseNameSize)
752 ? std::string(BaseNamePtr, BaseNameSize)
753 : std::string();
754 free(BaseNamePtr);
755 return Result;
756 };
757 auto IRBaseName = GetBaseName(IRFunc.getName());
758 auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
759 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
760 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
761 << ProfFunc << "(Profile) share the same base name: "
762 << IRBaseName << ".\n");
763 return true;
764 }
765
766 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
767 // With extbinary profile format, initial profile loading only reads profile
768 // based on current function names in the module.
769 // However, if a function is renamed, sample loader skips to load its original
770 // profile(which has a different name), we will miss this case. To address
771 // this, we load the top-level profile candidate explicitly for the matching.
772 if (!FSForMatching && LoadFuncProfileforCGMatching) {
773 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
774 if (std::error_code EC = Reader.read(TopLevelFunc))
775 return false;
776 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
777 LLVM_DEBUG({
778 if (FSForMatching)
779 dbgs() << "Read top-level function " << ProfFunc
780 << " for call-graph matching\n";
781 });
782 }
783 if (!FSForMatching)
784 return false;
785 // The check for similarity or checksum may not be reliable if the function is
786 // tiny, we use the number of basic block as a proxy for the function
787 // complexity and skip the matching if it's too small.
788 if (IRFunc.size() < MinFuncCountForCGMatching ||
789 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
790 return false;
791
792 // For probe-based function, we first trust the checksum info. If the checksum
793 // doesn't match, we continue checking for similarity.
795 const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
796 if (FuncDesc &&
797 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
798 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
799 << "(IR) and " << ProfFunc << "(Profile) match.\n");
800
801 return true;
802 }
803 }
804
805 AnchorMap IRAnchors;
806 findIRAnchors(IRFunc, IRAnchors);
807 AnchorMap ProfileAnchors;
808 findProfileAnchors(*FSForMatching, ProfileAnchors);
809
810 AnchorList FilteredIRAnchorsList;
811 AnchorList FilteredProfileAnchorList;
812 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
813 FilteredProfileAnchorList);
814
815 // Similarly skip the matching if the num of anchors is not enough.
816 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
817 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
818 return false;
819
820 // Use the diff algorithm to find the LCS between IR and profile.
821
822 // Don't recursively match the callee function to avoid infinite matching,
823 // callee functions will be handled later since it's processed in top-down
824 // order .
825 LocToLocMap MatchedAnchors =
826 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
827 false /* Match unused functions */);
828
829 Similarity = static_cast<float>(MatchedAnchors.size()) /
830 FilteredProfileAnchorList.size();
831
832 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
833 << "(IR) and " << ProfFunc << "(profile) is "
834 << format("%.2f", Similarity) << "\n");
835 assert((Similarity >= 0 && Similarity <= 1.0) &&
836 "Similarity value should be in [0, 1]");
837 return Similarity * 100 > FuncProfileSimilarityThreshold;
838}
839
840// If FindMatchedProfileOnly is set to true, only use the processed function
841// results. This is used for skipping the repeated recursive matching.
842bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
843 const FunctionId &ProfFunc,
844 bool FindMatchedProfileOnly) {
845 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
846 if (R != FuncProfileMatchCache.end())
847 return R->second;
848
849 if (FindMatchedProfileOnly)
850 return false;
851
852 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
853 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
854 if (Matched) {
855 FuncToProfileNameMap[&IRFunc] = ProfFunc;
856 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
857 << " matches profile:" << ProfFunc << "\n");
858 }
859
860 return Matched;
861}
862
863void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
864 DenseSet<StringRef> ProfileSalvagedFuncs;
865 // Update FuncNameToProfNameMap and SymbolMap.
866 for (auto &I : FuncToProfileNameMap) {
867 assert(I.first && "New function is null");
868 FunctionId FuncName(I.first->getName());
869 ProfileSalvagedFuncs.insert(I.second.stringRef());
870 FuncNameToProfNameMap->emplace(FuncName, I.second);
871
872 // We need to remove the old entry to avoid duplicating the function
873 // processing.
874 SymbolMap->erase(FuncName);
875 SymbolMap->emplace(I.second, I.first);
876 }
877
878 // With extbinary profile format, initial profile loading only reads profile
879 // based on current function names in the module, so we need to load top-level
880 // profiles for functions with different profile name explicitly after
881 // function-profile name map is established with stale profile matching.
882 Reader.read(ProfileSalvagedFuncs);
883 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
884}
885
887 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
890 findFunctionsWithoutProfile();
891
892 // Process the matching in top-down order so that the caller matching result
893 // can be used to the callee matching.
894 std::vector<Function *> TopDownFunctionList;
895 TopDownFunctionList.reserve(M.size());
896 buildTopDownFuncOrder(CG, TopDownFunctionList);
897 for (auto *F : TopDownFunctionList) {
899 continue;
900 runOnFunction(*F);
901 }
902
904 UpdateWithSalvagedProfiles();
905
907 distributeIRToProfileLocationMap();
908
909 computeAndReportProfileStaleness();
910}
911
912void SampleProfileMatcher::distributeIRToProfileLocationMap(
913 FunctionSamples &FS) {
914 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
915 if (ProfileMappings != FuncMappings.end()) {
916 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
917 }
918
919 for (auto &Callees :
920 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
921 for (auto &FS : Callees.second) {
922 distributeIRToProfileLocationMap(FS.second);
923 }
924 }
925}
926
927// Use a central place to distribute the matching results. Outlined and inlined
928// profile with the function name will be set to the same pointer.
929void SampleProfileMatcher::distributeIRToProfileLocationMap() {
930 for (auto &I : Reader.getProfiles()) {
931 distributeIRToProfileLocationMap(I.second);
932 }
933}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file provides the interface for SampleProfileMatcher.
#define LLVM_DEBUG(...)
Definition Debug.h:114
size_t size() const
Definition Function.h:856
iterator end()
Definition Function.h:853
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
reference emplace_back(ArgTypes &&... Args)
iterator end()
Definition StringMap.h:224
iterator find(StringRef Key)
Definition StringMap.h:237
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:285
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class represents a function that is read from a sample profile.
Definition FunctionId.h:36
StringRef stringRef() const
Convert to StringRef.
Definition FunctionId.h:108
Representation of the samples collected for a function.
Definition SampleProf.h:777
static LLVM_ABI bool ProfileIsCS
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static LLVM_ABI bool ProfileIsFS
If this profile uses flow sensitive discriminators.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition SampleProf.h:767
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition SampleProf.h:769
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
std::map< LineLocation, FunctionId > AnchorMap
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< bool > LoadFuncProfileforCGMatching("load-func-profile-for-cg-matching", cl::Hidden, cl::init(true), cl::desc("Load top-level profiles that the sample reader initially skipped for " "the call-graph matching (only meaningful for extended binary " "format)"))
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
static cl::opt< unsigned > MinCallCountForCGMatching("min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching."))
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< unsigned > MinFuncCountForCGMatching("min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
static cl::opt< unsigned > FuncProfileSimilarityThreshold("func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile."))