LLVM 20.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/IR/MDBuilder.h"
18
19using namespace llvm;
20using namespace sampleprof;
21
22#define DEBUG_TYPE "sample-profile-matcher"
23
25 "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
26 cl::desc("Consider a profile matches a function if the similarity of their "
27 "callee sequences is above the specified percentile."));
28
30 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
31 cl::desc("The minimum number of basic blocks required for a function to "
32 "run stale profile call graph matching."));
33
35 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
36 cl::desc("The minimum number of call anchors required for a function to "
37 "run stale profile call graph matching."));
38
43
45 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46 cl::desc("The maximum number of callsites in a function, above which stale "
47 "profile matching will be skipped."));
48
49void SampleProfileMatcher::findIRAnchors(const Function &F,
50 AnchorMap &IRAnchors) const {
51 // For inlined code, recover the original callsite and callee by finding the
52 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56 const DILocation *PrevDIL = nullptr;
57 do {
58 PrevDIL = DIL;
59 DIL = DIL->getInlinedAt();
60 } while (DIL->getInlinedAt());
61
64 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65 return std::make_pair(Callsite, FunctionId(CalleeName));
66 };
67
68 auto GetCanonicalCalleeName = [](const CallBase *CB) {
69 StringRef CalleeName = UnknownIndirectCallee;
70 if (Function *Callee = CB->getCalledFunction())
71 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
72 return CalleeName;
73 };
74
75 // Extract profile matching anchors in the IR.
76 for (auto &BB : F) {
77 for (auto &I : BB) {
78 DILocation *DIL = I.getDebugLoc();
79 if (!DIL)
80 continue;
81
83 if (auto Probe = extractProbe(I)) {
84 // Flatten inlined IR for the matching.
85 if (DIL->getInlinedAt()) {
86 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
87 } else {
88 // Use empty StringRef for basic block probe.
89 StringRef CalleeName;
90 if (const auto *CB = dyn_cast<CallBase>(&I)) {
91 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
92 if (!isa<IntrinsicInst>(&I))
93 CalleeName = GetCanonicalCalleeName(CB);
94 }
95 LineLocation Loc = LineLocation(Probe->Id, 0);
96 IRAnchors.emplace(Loc, FunctionId(CalleeName));
97 }
98 }
99 } else {
100 // TODO: For line-number based profile(AutoFDO), currently only support
101 // find callsite anchors. In future, we need to parse all the non-call
102 // instructions to extract the line locations for profile matching.
103 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
104 continue;
105
106 if (DIL->getInlinedAt()) {
107 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
108 } else {
111 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
112 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
113 }
114 }
115 }
116 }
117}
118
119void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120 AnchorMap &ProfileAnchors) const {
121 auto isInvalidLineOffset = [](uint32_t LineOffset) {
122 return LineOffset & 0x8000;
123 };
124
125 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126 AnchorMap &ProfileAnchors) {
127 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
128 if (!Ret.second) {
129 // For multiple callees, which indicates it's an indirect call, we use a
130 // dummy name(UnknownIndirectCallee) as the indrect callee name.
131 Ret.first->second = FunctionId(UnknownIndirectCallee);
132 }
133 };
134
135 for (const auto &I : FS.getBodySamples()) {
136 const LineLocation &Loc = I.first;
137 if (isInvalidLineOffset(Loc.LineOffset))
138 continue;
139 for (const auto &C : I.second.getCallTargets())
140 InsertAnchor(Loc, C.first, ProfileAnchors);
141 }
142
143 for (const auto &I : FS.getCallsiteSamples()) {
144 const LineLocation &Loc = I.first;
145 if (isInvalidLineOffset(Loc.LineOffset))
146 continue;
147 for (const auto &C : I.second)
148 InsertAnchor(Loc, C.first, ProfileAnchors);
149 }
150}
151
152bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153 Function *&FuncWithoutProfile) {
154 FuncWithoutProfile = nullptr;
155 auto R = FunctionsWithoutProfile.find(IRFuncName);
156 if (R != FunctionsWithoutProfile.end())
157 FuncWithoutProfile = R->second;
158 return !FuncWithoutProfile;
159}
160
161bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162 return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
163}
164
165bool SampleProfileMatcher::functionMatchesProfile(
166 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167 bool FindMatchedProfileOnly) {
168 if (IRFuncName == ProfileFuncName)
169 return true;
171 return false;
172
173 // If IR function doesn't have profile and the profile is unused, try
174 // matching them.
175 Function *IRFunc = nullptr;
176 if (functionHasProfile(IRFuncName, IRFunc) ||
177 !isProfileUnused(ProfileFuncName))
178 return false;
179
180 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181 "IR function should be different from profile function to match");
182 return functionMatchesProfile(*IRFunc, ProfileFuncName,
183 FindMatchedProfileOnly);
184}
185
187SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188 const AnchorList &AnchorList2,
189 bool MatchUnusedFunction) {
190 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191 MaxDepth = Size1 + Size2;
192 auto Index = [&](int32_t I) { return I + MaxDepth; };
193
194 LocToLocMap EqualLocations;
195 if (MaxDepth == 0)
196 return EqualLocations;
197
198 // Backtrack the SES result.
199 auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200 const AnchorList &AnchorList1,
201 const AnchorList &AnchorList2,
202 LocToLocMap &EqualLocations) {
203 int32_t X = Size1, Y = Size2;
204 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205 const auto &P = Trace[Depth];
206 int32_t K = X - Y;
207 int32_t PrevK = K;
208 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209 PrevK = K + 1;
210 else
211 PrevK = K - 1;
212
213 int32_t PrevX = P[Index(PrevK)];
214 int32_t PrevY = PrevX - PrevK;
215 while (X > PrevX && Y > PrevY) {
216 X--;
217 Y--;
218 EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
219 }
220
221 if (Depth == 0)
222 break;
223
224 if (Y == PrevY)
225 X--;
226 else if (X == PrevX)
227 Y--;
228 X = PrevX;
229 Y = PrevY;
230 }
231 };
232
233 // The greedy LCS/SES algorithm.
234
235 // An array contains the endpoints of the furthest reaching D-paths.
236 std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237 V[Index(1)] = 0;
238 // Trace is used to backtrack the SES result.
239 std::vector<std::vector<int32_t>> Trace;
240 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241 Trace.push_back(V);
242 for (int32_t K = -Depth; K <= Depth; K += 2) {
243 int32_t X = 0, Y = 0;
244 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245 X = V[Index(K + 1)];
246 else
247 X = V[Index(K - 1)] + 1;
248 Y = X - K;
249 while (X < Size1 && Y < Size2 &&
250 functionMatchesProfile(
251 AnchorList1[X].second, AnchorList2[Y].second,
252 !MatchUnusedFunction /* Find matched function only */))
253 X++, Y++;
254
255 V[Index(K)] = X;
256
257 if (X >= Size1 && Y >= Size2) {
258 // Length of an SES is D.
259 Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260 return EqualLocations;
261 }
262 }
263 }
264 // Length of an SES is greater than MaxDepth.
265 return EqualLocations;
266}
267
268void SampleProfileMatcher::matchNonCallsiteLocs(
269 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270 LocToLocMap &IRToProfileLocationMap) {
271 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272 // Skip the unchanged location mapping to save memory.
273 if (From != To)
274 IRToProfileLocationMap.insert({From, To});
275 };
276
277 // Use function's beginning location as the initial anchor.
278 int32_t LocationDelta = 0;
279 SmallVector<LineLocation> LastMatchedNonAnchors;
280 for (const auto &IR : IRAnchors) {
281 const auto &Loc = IR.first;
282 bool IsMatchedAnchor = false;
283 // Match the anchor location in lexical order.
284 auto R = MatchedAnchors.find(Loc);
285 if (R != MatchedAnchors.end()) {
286 const auto &Candidate = R->second;
287 InsertMatching(Loc, Candidate);
288 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289 << " is matched from " << Loc << " to " << Candidate
290 << "\n");
291 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292
293 // Match backwards for non-anchor locations.
294 // The locations in LastMatchedNonAnchors have been matched forwards
295 // based on the previous anchor, spilt it evenly and overwrite the
296 // second half based on the current anchor.
297 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298 I < LastMatchedNonAnchors.size(); I++) {
299 const auto &L = LastMatchedNonAnchors[I];
300 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302 InsertMatching(L, Candidate);
303 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304 << " to " << Candidate << "\n");
305 }
306
307 IsMatchedAnchor = true;
308 LastMatchedNonAnchors.clear();
309 }
310
311 // Match forwards for non-anchor locations.
312 if (!IsMatchedAnchor) {
313 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315 InsertMatching(Loc, Candidate);
316 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317 << Candidate << "\n");
318 LastMatchedNonAnchors.emplace_back(Loc);
319 }
320 }
321}
322
323// Filter the non-call locations from IRAnchors and ProfileAnchors and write
324// them into a list for random access later.
325void SampleProfileMatcher::getFilteredAnchorList(
326 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328 for (const auto &I : IRAnchors) {
329 if (I.second.stringRef().empty())
330 continue;
331 FilteredIRAnchorsList.emplace_back(I);
332 }
333
334 for (const auto &I : ProfileAnchors)
335 FilteredProfileAnchorList.emplace_back(I);
336}
337
338// Call target name anchor based profile fuzzy matching.
339// Input:
340// For IR locations, the anchor is the callee name of direct callsite; For
341// profile locations, it's the call target name for BodySamples or inlinee's
342// profile name for CallsiteSamples.
343// Matching heuristic:
344// First match all the anchors using the diff algorithm, then split the
345// non-anchor locations between the two anchors evenly, first half are matched
346// based on the start anchor, second half are matched based on the end anchor.
347// For example, given:
348// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
349// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350// The matching gives:
351// [1, 2(foo), 3, 5, 6(bar), 7]
352// | | | | | |
353// [1, 2, 3(foo), 4, 7, 8(bar), 9]
354// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
355void SampleProfileMatcher::runStaleProfileMatching(
356 const Function &F, const AnchorMap &IRAnchors,
357 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358 bool RunCFGMatching, bool RunCGMatching) {
359 if (!RunCFGMatching && !RunCGMatching)
360 return;
361 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362 << "\n");
363 assert(IRToProfileLocationMap.empty() &&
364 "Run stale profile matching only once per function");
365
366 AnchorList FilteredProfileAnchorList;
367 AnchorList FilteredIRAnchorsList;
368 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369 FilteredProfileAnchorList);
370
371 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372 return;
373
374 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377 << " because the number of callsites in the IR is "
378 << FilteredIRAnchorsList.size()
379 << " and in the profile is "
380 << FilteredProfileAnchorList.size() << "\n");
381 return;
382 }
383
384 // Match the callsite anchors by finding the longest common subsequence
385 // between IR and profile.
386 // Define a match between two anchors as follows:
387 // 1) The function names of anchors are the same.
388 // 2) The similarity between the anchor functions is above a threshold if
389 // RunCGMatching is set.
390 // For 2), we only consider the anchor functions from IR and profile don't
391 // appear on either side to reduce the matching scope. Note that we need to
392 // use IR anchor as base(A side) to align with the order of
393 // IRToProfileLocationMap.
394 LocToLocMap MatchedAnchors =
395 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
396 RunCGMatching /* Match unused functions */);
397
398 // CFG level matching:
399 // Apply the callsite matchings to infer matching for the basic
400 // block(non-callsite) locations and write the result to
401 // IRToProfileLocationMap.
402 if (RunCFGMatching)
403 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404}
405
406void SampleProfileMatcher::runOnFunction(Function &F) {
407 // We need to use flattened function samples for matching.
408 // Unlike IR, which includes all callsites from the source code, the callsites
409 // in profile only show up when they are hit by samples, i,e. the profile
410 // callsites in one context may differ from those in another context. To get
411 // the maximum number of callsites, we merge the function profiles from all
412 // contexts, aka, the flattened profile to find profile anchors.
413 const auto *FSFlattened = getFlattenedSamplesFor(F);
414 if (SalvageUnusedProfile && !FSFlattened) {
415 // Apply the matching in place to find the new function's matched profile.
416 // TODO: For extended profile format, if a function profile is unused and
417 // it's top-level, even if the profile is matched, it's not found in the
418 // profile. This is because sample reader only read the used profile at the
419 // beginning, we need to support loading the profile on-demand in future.
420 auto R = FuncToProfileNameMap.find(&F);
421 if (R != FuncToProfileNameMap.end())
422 FSFlattened = getFlattenedSamplesFor(R->second);
423 }
424 if (!FSFlattened)
425 return;
426
427 // Anchors for IR. It's a map from IR location to callee name, callee name is
428 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429 // for unknown indrect callee name.
430 AnchorMap IRAnchors;
431 findIRAnchors(F, IRAnchors);
432 // Anchors for profile. It's a map from callsite location to a set of callee
433 // name.
434 AnchorMap ProfileAnchors;
435 findProfileAnchors(*FSFlattened, ProfileAnchors);
436
437 // Compute the callsite match states for profile staleness report.
439 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
440
442 return;
443 // For probe-based profiles, run matching only when profile checksum is
444 // mismatched.
445 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446 !ProbeManager->profileIsValid(F, *FSFlattened);
447 bool RunCFGMatching =
448 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449 bool RunCGMatching = SalvageUnusedProfile;
450 // For imported functions, the checksum metadata(pseudo_probe_desc) are
451 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
452 // transfer the info: add the attribute during pre-link phase and check it
453 // during post-link phase(see "profileIsValid").
454 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455 F.addFnAttr("profile-checksum-mismatch");
456
457 // The matching result will be saved to IRToProfileLocationMap, create a
458 // new map for each function.
459 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461 RunCFGMatching, RunCGMatching);
462 // Find and update callsite match states after matching.
463 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465 &IRToProfileLocationMap);
466}
467
468void SampleProfileMatcher::recordCallsiteMatchStates(
469 const Function &F, const AnchorMap &IRAnchors,
470 const AnchorMap &ProfileAnchors,
471 const LocToLocMap *IRToProfileLocationMap) {
472 bool IsPostMatch = IRToProfileLocationMap != nullptr;
473 auto &CallsiteMatchStates =
474 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
475
476 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477 // IRToProfileLocationMap is null in pre-match phrase.
478 if (!IRToProfileLocationMap)
479 return IRLoc;
480 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
481 if (ProfileLoc != IRToProfileLocationMap->end())
482 return ProfileLoc->second;
483 else
484 return IRLoc;
485 };
486
487 for (const auto &I : IRAnchors) {
488 // After fuzzy profile matching, use the matching result to remap the
489 // current IR callsite.
490 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491 const auto &IRCalleeId = I.second;
492 const auto &It = ProfileAnchors.find(ProfileLoc);
493 if (It == ProfileAnchors.end())
494 continue;
495 const auto &ProfCalleeId = It->second;
496 if (IRCalleeId == ProfCalleeId) {
497 auto It = CallsiteMatchStates.find(ProfileLoc);
498 if (It == CallsiteMatchStates.end())
499 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
500 else if (IsPostMatch) {
501 if (It->second == MatchState::InitialMatch)
502 It->second = MatchState::UnchangedMatch;
503 else if (It->second == MatchState::InitialMismatch)
504 It->second = MatchState::RecoveredMismatch;
505 }
506 }
507 }
508
509 // Check if there are any callsites in the profile that does not match to any
510 // IR callsites.
511 for (const auto &I : ProfileAnchors) {
512 const auto &Loc = I.first;
513 assert(!I.second.stringRef().empty() && "Callees should not be empty");
514 auto It = CallsiteMatchStates.find(Loc);
515 if (It == CallsiteMatchStates.end())
516 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
517 else if (IsPostMatch) {
518 // Update the state if it's not matched(UnchangedMatch or
519 // RecoveredMismatch).
520 if (It->second == MatchState::InitialMismatch)
521 It->second = MatchState::UnchangedMismatch;
522 else if (It->second == MatchState::InitialMatch)
523 It->second = MatchState::RemovedMatch;
524 }
525 }
526}
527
528void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529 bool IsTopLevel) {
530 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
531 // Skip the function that is external or renamed.
532 if (!FuncDesc)
533 return;
534
535 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
536 if (IsTopLevel)
537 NumStaleProfileFunc++;
538 // Given currently all probe ids are after block probe ids, once the
539 // checksum is mismatched, it's likely all the callites are mismatched and
540 // dropped. We conservatively count all the samples as mismatched and stop
541 // counting the inlinees' profiles.
542 MismatchedFunctionSamples += FS.getTotalSamples();
543 return;
544 }
545
546 // Even the current-level function checksum is matched, it's possible that the
547 // nested inlinees' checksums are mismatched that affect the inlinee's sample
548 // loading, we need to go deeper to check the inlinees' function samples.
549 // Similarly, count all the samples as mismatched if the inlinee's checksum is
550 // mismatched using this recursive function.
551 for (const auto &I : FS.getCallsiteSamples())
552 for (const auto &CS : I.second)
553 countMismatchedFuncSamples(CS.second, false);
554}
555
556void SampleProfileMatcher::countMismatchedCallsiteSamples(
557 const FunctionSamples &FS) {
558 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
559 // Skip it if no mismatched callsite or this is an external function.
560 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561 return;
562 const auto &CallsiteMatchStates = It->second;
563
564 auto findMatchState = [&](const LineLocation &Loc) {
565 auto It = CallsiteMatchStates.find(Loc);
566 if (It == CallsiteMatchStates.end())
567 return MatchState::Unknown;
568 return It->second;
569 };
570
571 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572 uint64_t Samples) {
573 if (isMismatchState(State))
574 MismatchedCallsiteSamples += Samples;
575 else if (State == MatchState::RecoveredMismatch)
576 RecoveredCallsiteSamples += Samples;
577 };
578
579 // The non-inlined callsites are saved in the body samples of function
580 // profile, go through it to count the non-inlined callsite samples.
581 for (const auto &I : FS.getBodySamples())
582 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583
584 // Count the inlined callsite samples.
585 for (const auto &I : FS.getCallsiteSamples()) {
586 auto State = findMatchState(I.first);
587 uint64_t CallsiteSamples = 0;
588 for (const auto &CS : I.second)
589 CallsiteSamples += CS.second.getTotalSamples();
590 AttributeMismatchedSamples(State, CallsiteSamples);
591
592 if (isMismatchState(State))
593 continue;
594
595 // When the current level of inlined call site matches the profiled call
596 // site, we need to go deeper along the inline tree to count mismatches from
597 // lower level inlinees.
598 for (const auto &CS : I.second)
599 countMismatchedCallsiteSamples(CS.second);
600 }
601}
602
603void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
605 // Skip it if no mismatched callsite or this is an external function.
606 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607 return;
608 const auto &MatchStates = It->second;
609 [[maybe_unused]] bool OnInitialState =
610 isInitialState(MatchStates.begin()->second);
611 for (const auto &I : MatchStates) {
612 TotalProfiledCallsites++;
613 assert(
614 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615 "Profile matching state is inconsistent");
616
617 if (isMismatchState(I.second))
618 NumMismatchedCallsites++;
619 else if (I.second == MatchState::RecoveredMismatch)
620 NumRecoveredCallsites++;
621 }
622}
623
624void SampleProfileMatcher::countCallGraphRecoveredSamples(
625 const FunctionSamples &FS,
626 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627 if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
628 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629 return;
630 }
631
632 for (const auto &CM : FS.getCallsiteSamples()) {
633 for (const auto &CS : CM.second) {
634 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
635 }
636 }
637}
638
639void SampleProfileMatcher::computeAndReportProfileStaleness() {
641 return;
642
643 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
645 for (const auto &I : FuncToProfileNameMap) {
646 CallGraphRecoveredProfiles.insert(I.second);
647 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
648 continue;
649 NumCallGraphRecoveredProfiledFunc++;
650 }
651 }
652
653 // Count profile mismatches for profile staleness report.
654 for (const auto &F : M) {
656 continue;
657 // As the stats will be merged by linker, skip reporting the metrics for
658 // imported functions to avoid repeated counting.
660 continue;
661 const auto *FS = Reader.getSamplesFor(F);
662 if (!FS)
663 continue;
664 TotalProfiledFunc++;
665 TotalFunctionSamples += FS->getTotalSamples();
666
667 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
669
670 // Checksum mismatch is only used in pseudo-probe mode.
672 countMismatchedFuncSamples(*FS, true);
673
674 // Count mismatches and samples for calliste.
675 countMismatchCallsites(*FS);
676 countMismatchedCallsiteSamples(*FS);
677 }
678
681 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682 << ") of functions' profile are invalid and ("
683 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
684 << ") of samples are discarded due to function hash mismatch.\n";
685 }
687 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688 << TotalProfiledFunc << ") of functions' profile are matched and ("
689 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690 << ") of samples are reused by call graph matching.\n";
691 }
692
693 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694 << TotalProfiledCallsites
695 << ") of callsites' profile are invalid and ("
696 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697 << TotalFunctionSamples
698 << ") of samples are discarded due to callsite location mismatch.\n";
699 errs() << "(" << NumRecoveredCallsites << "/"
700 << (NumRecoveredCallsites + NumMismatchedCallsites)
701 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
702 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703 << ") of samples are recovered by stale profile matching.\n";
704 }
705
707 LLVMContext &Ctx = M.getContext();
708 MDBuilder MDB(Ctx);
709
712 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
713 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
714 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
715 MismatchedFunctionSamples);
716 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
717 }
718
720 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
721 NumCallGraphRecoveredProfiledFunc);
722 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
723 NumCallGraphRecoveredFuncSamples);
724 }
725
726 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
727 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
728 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
729 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
730 MismatchedCallsiteSamples);
731 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
732 RecoveredCallsiteSamples);
733
734 auto *MD = MDB.createLLVMStats(ProfStatsVec);
735 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
736 NMD->addOperand(MD);
737 }
738}
739
740void SampleProfileMatcher::findFunctionsWithoutProfile() {
741 // TODO: Support MD5 profile.
743 return;
744 StringSet<> NamesInProfile;
745 if (auto NameTable = Reader.getNameTable()) {
746 for (auto Name : *NameTable)
747 NamesInProfile.insert(Name.stringRef());
748 }
749
750 for (auto &F : M) {
751 // Skip declarations, as even if the function can be matched, we have
752 // nothing to do with it.
753 if (F.isDeclaration())
754 continue;
755
756 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
757 const auto *FS = getFlattenedSamplesFor(F);
758 if (FS)
759 continue;
760
761 // For extended binary, functions fully inlined may not be loaded in the
762 // top-level profile, so check the NameTable which has the all symbol names
763 // in profile.
764 if (NamesInProfile.count(CanonFName))
765 continue;
766
767 // For extended binary, non-profiled function symbols are in the profile
768 // symbol list table.
769 if (PSL && PSL->contains(CanonFName))
770 continue;
771
772 LLVM_DEBUG(dbgs() << "Function " << CanonFName
773 << " is not in profile or profile symbol list.\n");
774 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775 }
776}
777
778bool SampleProfileMatcher::functionMatchesProfileHelper(
779 const Function &IRFunc, const FunctionId &ProfFunc) {
780 // The value is in the range [0, 1]. The bigger the value is, the more similar
781 // two sequences are.
782 float Similarity = 0.0;
783
784 const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
785 if (!FSFlattened)
786 return false;
787 // The check for similarity or checksum may not be reliable if the function is
788 // tiny, we use the number of basic block as a proxy for the function
789 // complexity and skip the matching if it's too small.
790 if (IRFunc.size() < MinFuncCountForCGMatching ||
791 FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792 return false;
793
794 // For probe-based function, we first trust the checksum info. If the checksum
795 // doesn't match, we continue checking for similarity.
797 const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
798 if (FuncDesc &&
799 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
800 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801 << "(IR) and " << ProfFunc << "(Profile) match.\n");
802
803 return true;
804 }
805 }
806
807 AnchorMap IRAnchors;
808 findIRAnchors(IRFunc, IRAnchors);
809 AnchorMap ProfileAnchors;
810 findProfileAnchors(*FSFlattened, ProfileAnchors);
811
812 AnchorList FilteredIRAnchorsList;
813 AnchorList FilteredProfileAnchorList;
814 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815 FilteredProfileAnchorList);
816
817 // Similarly skip the matching if the num of anchors is not enough.
818 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820 return false;
821
822 // Use the diff algorithm to find the LCS between IR and profile.
823
824 // Don't recursively match the callee function to avoid infinite matching,
825 // callee functions will be handled later since it's processed in top-down
826 // order .
827 LocToLocMap MatchedAnchors =
828 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
829 false /* Match unused functions */);
830
831 Similarity =
832 static_cast<float>(MatchedAnchors.size()) * 2 /
833 (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834
835 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836 << "(IR) and " << ProfFunc << "(profile) is "
837 << format("%.2f", Similarity) << "\n");
838 assert((Similarity >= 0 && Similarity <= 1.0) &&
839 "Similarity value should be in [0, 1]");
840 return Similarity * 100 > FuncProfileSimilarityThreshold;
841}
842
843// If FindMatchedProfileOnly is set to true, only use the processed function
844// results. This is used for skipping the repeated recursive matching.
845bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846 const FunctionId &ProfFunc,
847 bool FindMatchedProfileOnly) {
848 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
849 if (R != FuncProfileMatchCache.end())
850 return R->second;
851
852 if (FindMatchedProfileOnly)
853 return false;
854
855 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857 if (Matched) {
858 FuncToProfileNameMap[&IRFunc] = ProfFunc;
859 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860 << " matches profile:" << ProfFunc << "\n");
861 }
862
863 return Matched;
864}
865
867 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
870 findFunctionsWithoutProfile();
871
872 // Process the matching in top-down order so that the caller matching result
873 // can be used to the callee matching.
874 std::vector<Function *> TopDownFunctionList;
875 TopDownFunctionList.reserve(M.size());
876 buildTopDownFuncOrder(CG, TopDownFunctionList);
877 for (auto *F : TopDownFunctionList) {
879 continue;
880 runOnFunction(*F);
881 }
882
883 // Update the data in SampleLoader.
885 for (auto &I : FuncToProfileNameMap) {
886 assert(I.first && "New function is null");
887 FunctionId FuncName(I.first->getName());
888 FuncNameToProfNameMap->emplace(FuncName, I.second);
889 // We need to remove the old entry to avoid duplicating the function
890 // processing.
891 SymbolMap->erase(FuncName);
892 SymbolMap->emplace(I.second, I.first);
893 }
894
896 distributeIRToProfileLocationMap();
897
898 computeAndReportProfileStaleness();
899}
900
901void SampleProfileMatcher::distributeIRToProfileLocationMap(
902 FunctionSamples &FS) {
903 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
904 if (ProfileMappings != FuncMappings.end()) {
905 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906 }
907
908 for (auto &Callees :
909 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910 for (auto &FS : Callees.second) {
911 distributeIRToProfileLocationMap(FS.second);
912 }
913 }
914}
915
916// Use a central place to distribute the matching results. Outlined and inlined
917// profile with the function name will be set to the same pointer.
918void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919 for (auto &I : Reader.getProfiles()) {
920 distributeIRToProfileLocationMap(I.second);
921 }
922}
BlockVerifier::State From
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > MinCallCountForCGMatching("min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching."))
cl::opt< bool > ReportProfileStaleness
static cl::opt< unsigned > FuncProfileSimilarityThreshold("func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile."))
cl::opt< bool > SalvageStaleProfile
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
static cl::opt< unsigned > MinFuncCountForCGMatching("min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching."))
cl::opt< bool > SalvageUnusedProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Debug location.
size_t size() const
Definition: Function.h:856
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
size_t size() const
Definition: SmallVector.h:92
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
iterator end()
Definition: StringMap.h:220
iterator find(StringRef Key)
Definition: StringMap.h:233
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
unsigned size() const
Definition: Trace.h:95
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:745
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1090
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Definition: SampleProf.h:1203
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1197
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1424
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
virtual std::vector< FunctionId > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:210
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: SampleProf.h:738
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition: SampleProf.h:736
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.
Definition: SampleProf.h:280