LLVM 19.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/IR/MDBuilder.h"
18
19using namespace llvm;
20using namespace sampleprof;
21
22#define DEBUG_TYPE "sample-profile-matcher"
23
27
29 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
30 cl::desc("The maximum number of callsites in a function, above which stale "
31 "profile matching will be skipped."));
32
33void SampleProfileMatcher::findIRAnchors(const Function &F,
34 AnchorMap &IRAnchors) {
35 // For inlined code, recover the original callsite and callee by finding the
36 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
37 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
38 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
39 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
40 const DILocation *PrevDIL = nullptr;
41 do {
42 PrevDIL = DIL;
43 DIL = DIL->getInlinedAt();
44 } while (DIL->getInlinedAt());
45
48 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
49 return std::make_pair(Callsite, FunctionId(CalleeName));
50 };
51
52 auto GetCanonicalCalleeName = [](const CallBase *CB) {
53 StringRef CalleeName = UnknownIndirectCallee;
54 if (Function *Callee = CB->getCalledFunction())
55 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
56 return CalleeName;
57 };
58
59 // Extract profile matching anchors in the IR.
60 for (auto &BB : F) {
61 for (auto &I : BB) {
62 DILocation *DIL = I.getDebugLoc();
63 if (!DIL)
64 continue;
65
67 if (auto Probe = extractProbe(I)) {
68 // Flatten inlined IR for the matching.
69 if (DIL->getInlinedAt()) {
70 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
71 } else {
72 // Use empty StringRef for basic block probe.
73 StringRef CalleeName;
74 if (const auto *CB = dyn_cast<CallBase>(&I)) {
75 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
76 if (!isa<IntrinsicInst>(&I))
77 CalleeName = GetCanonicalCalleeName(CB);
78 }
79 LineLocation Loc = LineLocation(Probe->Id, 0);
80 IRAnchors.emplace(Loc, FunctionId(CalleeName));
81 }
82 }
83 } else {
84 // TODO: For line-number based profile(AutoFDO), currently only support
85 // find callsite anchors. In future, we need to parse all the non-call
86 // instructions to extract the line locations for profile matching.
87 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
88 continue;
89
90 if (DIL->getInlinedAt()) {
91 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
92 } else {
95 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
96 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
97 }
98 }
99 }
100 }
101}
102
103void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
104 AnchorMap &ProfileAnchors) {
105 auto isInvalidLineOffset = [](uint32_t LineOffset) {
106 return LineOffset & 0x8000;
107 };
108
109 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
110 AnchorMap &ProfileAnchors) {
111 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
112 if (!Ret.second) {
113 // For multiple callees, which indicates it's an indirect call, we use a
114 // dummy name(UnknownIndirectCallee) as the indrect callee name.
115 Ret.first->second = FunctionId(UnknownIndirectCallee);
116 }
117 };
118
119 for (const auto &I : FS.getBodySamples()) {
120 const LineLocation &Loc = I.first;
121 if (isInvalidLineOffset(Loc.LineOffset))
122 continue;
123 for (const auto &C : I.second.getCallTargets())
124 InsertAnchor(Loc, C.first, ProfileAnchors);
125 }
126
127 for (const auto &I : FS.getCallsiteSamples()) {
128 const LineLocation &Loc = I.first;
129 if (isInvalidLineOffset(Loc.LineOffset))
130 continue;
131 for (const auto &C : I.second)
132 InsertAnchor(Loc, C.first, ProfileAnchors);
133 }
134}
135
136LocToLocMap SampleProfileMatcher::longestCommonSequence(
137 const AnchorList &AnchorList1, const AnchorList &AnchorList2) const {
138 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
139 MaxDepth = Size1 + Size2;
140 auto Index = [&](int32_t I) { return I + MaxDepth; };
141
142 LocToLocMap EqualLocations;
143 if (MaxDepth == 0)
144 return EqualLocations;
145
146 // Backtrack the SES result.
147 auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
148 const AnchorList &AnchorList1,
149 const AnchorList &AnchorList2,
150 LocToLocMap &EqualLocations) {
151 int32_t X = Size1, Y = Size2;
152 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
153 const auto &P = Trace[Depth];
154 int32_t K = X - Y;
155 int32_t PrevK = K;
156 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
157 PrevK = K + 1;
158 else
159 PrevK = K - 1;
160
161 int32_t PrevX = P[Index(PrevK)];
162 int32_t PrevY = PrevX - PrevK;
163 while (X > PrevX && Y > PrevY) {
164 X--;
165 Y--;
166 EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
167 }
168
169 if (Depth == 0)
170 break;
171
172 if (Y == PrevY)
173 X--;
174 else if (X == PrevX)
175 Y--;
176 X = PrevX;
177 Y = PrevY;
178 }
179 };
180
181 // The greedy LCS/SES algorithm.
182
183 // An array contains the endpoints of the furthest reaching D-paths.
184 std::vector<int32_t> V(2 * MaxDepth + 1, -1);
185 V[Index(1)] = 0;
186 // Trace is used to backtrack the SES result.
187 std::vector<std::vector<int32_t>> Trace;
188 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
189 Trace.push_back(V);
190 for (int32_t K = -Depth; K <= Depth; K += 2) {
191 int32_t X = 0, Y = 0;
192 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
193 X = V[Index(K + 1)];
194 else
195 X = V[Index(K - 1)] + 1;
196 Y = X - K;
197 while (X < Size1 && Y < Size2 &&
198 AnchorList1[X].second == AnchorList2[Y].second)
199 X++, Y++;
200
201 V[Index(K)] = X;
202
203 if (X >= Size1 && Y >= Size2) {
204 // Length of an SES is D.
205 Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
206 return EqualLocations;
207 }
208 }
209 }
210 // Length of an SES is greater than MaxDepth.
211 return EqualLocations;
212}
213
214void SampleProfileMatcher::matchNonCallsiteLocs(
215 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
216 LocToLocMap &IRToProfileLocationMap) {
217 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
218 // Skip the unchanged location mapping to save memory.
219 if (From != To)
220 IRToProfileLocationMap.insert({From, To});
221 };
222
223 // Use function's beginning location as the initial anchor.
224 int32_t LocationDelta = 0;
225 SmallVector<LineLocation> LastMatchedNonAnchors;
226 for (const auto &IR : IRAnchors) {
227 const auto &Loc = IR.first;
228 bool IsMatchedAnchor = false;
229 // Match the anchor location in lexical order.
230 auto R = MatchedAnchors.find(Loc);
231 if (R != MatchedAnchors.end()) {
232 const auto &Candidate = R->second;
233 InsertMatching(Loc, Candidate);
234 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
235 << " is matched from " << Loc << " to " << Candidate
236 << "\n");
237 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
238
239 // Match backwards for non-anchor locations.
240 // The locations in LastMatchedNonAnchors have been matched forwards
241 // based on the previous anchor, spilt it evenly and overwrite the
242 // second half based on the current anchor.
243 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
244 I < LastMatchedNonAnchors.size(); I++) {
245 const auto &L = LastMatchedNonAnchors[I];
246 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
247 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
248 InsertMatching(L, Candidate);
249 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
250 << " to " << Candidate << "\n");
251 }
252
253 IsMatchedAnchor = true;
254 LastMatchedNonAnchors.clear();
255 }
256
257 // Match forwards for non-anchor locations.
258 if (!IsMatchedAnchor) {
259 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
260 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
261 InsertMatching(Loc, Candidate);
262 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
263 << Candidate << "\n");
264 LastMatchedNonAnchors.emplace_back(Loc);
265 }
266 }
267}
268
269// Call target name anchor based profile fuzzy matching.
270// Input:
271// For IR locations, the anchor is the callee name of direct callsite; For
272// profile locations, it's the call target name for BodySamples or inlinee's
273// profile name for CallsiteSamples.
274// Matching heuristic:
275// First match all the anchors using the diff algorithm, then split the
276// non-anchor locations between the two anchors evenly, first half are matched
277// based on the start anchor, second half are matched based on the end anchor.
278// For example, given:
279// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
280// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
281// The matching gives:
282// [1, 2(foo), 3, 5, 6(bar), 7]
283// | | | | | |
284// [1, 2, 3(foo), 4, 7, 8(bar), 9]
285// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
286void SampleProfileMatcher::runStaleProfileMatching(
287 const Function &F, const AnchorMap &IRAnchors,
288 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap) {
289 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
290 << "\n");
291 assert(IRToProfileLocationMap.empty() &&
292 "Run stale profile matching only once per function");
293
294 AnchorList FilteredProfileAnchorList;
295 for (const auto &I : ProfileAnchors)
296 FilteredProfileAnchorList.emplace_back(I);
297
298 AnchorList FilteredIRAnchorsList;
299 // Filter the non-callsite from IRAnchors.
300 for (const auto &I : IRAnchors) {
301 if (I.second.stringRef().empty())
302 continue;
303 FilteredIRAnchorsList.emplace_back(I);
304 }
305
306 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
307 return;
308
309 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
310 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
311 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
312 << " because the number of callsites in the IR is "
313 << FilteredIRAnchorsList.size()
314 << " and in the profile is "
315 << FilteredProfileAnchorList.size() << "\n");
316 return;
317 }
318
319 // Match the callsite anchors by finding the longest common subsequence
320 // between IR and profile. Note that we need to use IR anchor as base(A side)
321 // to align with the order of IRToProfileLocationMap.
322 LocToLocMap MatchedAnchors =
323 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
324
325 // Match the non-callsite locations and write the result to
326 // IRToProfileLocationMap.
327 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
328}
329
330void SampleProfileMatcher::runOnFunction(Function &F) {
331 // We need to use flattened function samples for matching.
332 // Unlike IR, which includes all callsites from the source code, the callsites
333 // in profile only show up when they are hit by samples, i,e. the profile
334 // callsites in one context may differ from those in another context. To get
335 // the maximum number of callsites, we merge the function profiles from all
336 // contexts, aka, the flattened profile to find profile anchors.
337 const auto *FSFlattened = getFlattenedSamplesFor(F);
338 if (!FSFlattened)
339 return;
340
341 // Anchors for IR. It's a map from IR location to callee name, callee name is
342 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
343 // for unknown indrect callee name.
344 AnchorMap IRAnchors;
345 findIRAnchors(F, IRAnchors);
346 // Anchors for profile. It's a map from callsite location to a set of callee
347 // name.
348 AnchorMap ProfileAnchors;
349 findProfileAnchors(*FSFlattened, ProfileAnchors);
350
351 // Compute the callsite match states for profile staleness report.
353 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
354
355 // For probe-based profiles, run matching only when the current profile is not
356 // valid.
358 !ProbeManager->profileIsValid(F, *FSFlattened))) {
359 // For imported functions, the checksum metadata(pseudo_probe_desc) are
360 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
361 // transfer the info: add the attribute during pre-link phase and check it
362 // during post-link phase(see "profileIsValid").
365 F.addFnAttr("profile-checksum-mismatch");
366
367 // The matching result will be saved to IRToProfileLocationMap, create a
368 // new map for each function.
369 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
370 runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
371 IRToProfileLocationMap);
372 // Find and update callsite match states after matching.
374 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
375 &IRToProfileLocationMap);
376 }
377}
378
379void SampleProfileMatcher::recordCallsiteMatchStates(
380 const Function &F, const AnchorMap &IRAnchors,
381 const AnchorMap &ProfileAnchors,
382 const LocToLocMap *IRToProfileLocationMap) {
383 bool IsPostMatch = IRToProfileLocationMap != nullptr;
384 auto &CallsiteMatchStates =
385 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
386
387 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
388 // IRToProfileLocationMap is null in pre-match phrase.
389 if (!IRToProfileLocationMap)
390 return IRLoc;
391 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
392 if (ProfileLoc != IRToProfileLocationMap->end())
393 return ProfileLoc->second;
394 else
395 return IRLoc;
396 };
397
398 for (const auto &I : IRAnchors) {
399 // After fuzzy profile matching, use the matching result to remap the
400 // current IR callsite.
401 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
402 const auto &IRCalleeId = I.second;
403 const auto &It = ProfileAnchors.find(ProfileLoc);
404 if (It == ProfileAnchors.end())
405 continue;
406 const auto &ProfCalleeId = It->second;
407 if (IRCalleeId == ProfCalleeId) {
408 auto It = CallsiteMatchStates.find(ProfileLoc);
409 if (It == CallsiteMatchStates.end())
410 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
411 else if (IsPostMatch) {
412 if (It->second == MatchState::InitialMatch)
413 It->second = MatchState::UnchangedMatch;
414 else if (It->second == MatchState::InitialMismatch)
415 It->second = MatchState::RecoveredMismatch;
416 }
417 }
418 }
419
420 // Check if there are any callsites in the profile that does not match to any
421 // IR callsites.
422 for (const auto &I : ProfileAnchors) {
423 const auto &Loc = I.first;
424 assert(!I.second.stringRef().empty() && "Callees should not be empty");
425 auto It = CallsiteMatchStates.find(Loc);
426 if (It == CallsiteMatchStates.end())
427 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
428 else if (IsPostMatch) {
429 // Update the state if it's not matched(UnchangedMatch or
430 // RecoveredMismatch).
431 if (It->second == MatchState::InitialMismatch)
432 It->second = MatchState::UnchangedMismatch;
433 else if (It->second == MatchState::InitialMatch)
434 It->second = MatchState::RemovedMatch;
435 }
436 }
437}
438
439void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
440 bool IsTopLevel) {
441 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
442 // Skip the function that is external or renamed.
443 if (!FuncDesc)
444 return;
445
446 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
447 if (IsTopLevel)
448 NumStaleProfileFunc++;
449 // Given currently all probe ids are after block probe ids, once the
450 // checksum is mismatched, it's likely all the callites are mismatched and
451 // dropped. We conservatively count all the samples as mismatched and stop
452 // counting the inlinees' profiles.
453 MismatchedFunctionSamples += FS.getTotalSamples();
454 return;
455 }
456
457 // Even the current-level function checksum is matched, it's possible that the
458 // nested inlinees' checksums are mismatched that affect the inlinee's sample
459 // loading, we need to go deeper to check the inlinees' function samples.
460 // Similarly, count all the samples as mismatched if the inlinee's checksum is
461 // mismatched using this recursive function.
462 for (const auto &I : FS.getCallsiteSamples())
463 for (const auto &CS : I.second)
464 countMismatchedFuncSamples(CS.second, false);
465}
466
467void SampleProfileMatcher::countMismatchedCallsiteSamples(
468 const FunctionSamples &FS) {
469 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
470 // Skip it if no mismatched callsite or this is an external function.
471 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
472 return;
473 const auto &CallsiteMatchStates = It->second;
474
475 auto findMatchState = [&](const LineLocation &Loc) {
476 auto It = CallsiteMatchStates.find(Loc);
477 if (It == CallsiteMatchStates.end())
478 return MatchState::Unknown;
479 return It->second;
480 };
481
482 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
483 uint64_t Samples) {
484 if (isMismatchState(State))
485 MismatchedCallsiteSamples += Samples;
486 else if (State == MatchState::RecoveredMismatch)
487 RecoveredCallsiteSamples += Samples;
488 };
489
490 // The non-inlined callsites are saved in the body samples of function
491 // profile, go through it to count the non-inlined callsite samples.
492 for (const auto &I : FS.getBodySamples())
493 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
494
495 // Count the inlined callsite samples.
496 for (const auto &I : FS.getCallsiteSamples()) {
497 auto State = findMatchState(I.first);
498 uint64_t CallsiteSamples = 0;
499 for (const auto &CS : I.second)
500 CallsiteSamples += CS.second.getTotalSamples();
501 AttributeMismatchedSamples(State, CallsiteSamples);
502
503 if (isMismatchState(State))
504 continue;
505
506 // When the current level of inlined call site matches the profiled call
507 // site, we need to go deeper along the inline tree to count mismatches from
508 // lower level inlinees.
509 for (const auto &CS : I.second)
510 countMismatchedCallsiteSamples(CS.second);
511 }
512}
513
514void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
515 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
516 // Skip it if no mismatched callsite or this is an external function.
517 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
518 return;
519 const auto &MatchStates = It->second;
520 [[maybe_unused]] bool OnInitialState =
521 isInitialState(MatchStates.begin()->second);
522 for (const auto &I : MatchStates) {
523 TotalProfiledCallsites++;
524 assert(
525 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
526 "Profile matching state is inconsistent");
527
528 if (isMismatchState(I.second))
529 NumMismatchedCallsites++;
530 else if (I.second == MatchState::RecoveredMismatch)
531 NumRecoveredCallsites++;
532 }
533}
534
535void SampleProfileMatcher::computeAndReportProfileStaleness() {
537 return;
538
539 // Count profile mismatches for profile staleness report.
540 for (const auto &F : M) {
542 continue;
543 // As the stats will be merged by linker, skip reporting the metrics for
544 // imported functions to avoid repeated counting.
546 continue;
547 const auto *FS = Reader.getSamplesFor(F);
548 if (!FS)
549 continue;
550 TotalProfiledFunc++;
551 TotalFunctionSamples += FS->getTotalSamples();
552
553 // Checksum mismatch is only used in pseudo-probe mode.
555 countMismatchedFuncSamples(*FS, true);
556
557 // Count mismatches and samples for calliste.
558 countMismatchCallsites(*FS);
559 countMismatchedCallsiteSamples(*FS);
560 }
561
564 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
565 << ") of functions' profile are invalid and ("
566 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
567 << ") of samples are discarded due to function hash mismatch.\n";
568 }
569 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
570 << TotalProfiledCallsites
571 << ") of callsites' profile are invalid and ("
572 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
573 << TotalFunctionSamples
574 << ") of samples are discarded due to callsite location mismatch.\n";
575 errs() << "(" << NumRecoveredCallsites << "/"
576 << (NumRecoveredCallsites + NumMismatchedCallsites)
577 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
578 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
579 << ") of samples are recovered by stale profile matching.\n";
580 }
581
583 LLVMContext &Ctx = M.getContext();
584 MDBuilder MDB(Ctx);
585
588 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
589 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
590 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
591 MismatchedFunctionSamples);
592 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
593 }
594
595 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
596 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
597 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
598 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
599 MismatchedCallsiteSamples);
600 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
601 RecoveredCallsiteSamples);
602
603 auto *MD = MDB.createLLVMStats(ProfStatsVec);
604 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
605 NMD->addOperand(MD);
606 }
607}
608
610 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
612 for (auto &F : M) {
614 continue;
615 runOnFunction(F);
616 }
618 distributeIRToProfileLocationMap();
619
620 computeAndReportProfileStaleness();
621}
622
623void SampleProfileMatcher::distributeIRToProfileLocationMap(
624 FunctionSamples &FS) {
625 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
626 if (ProfileMappings != FuncMappings.end()) {
627 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
628 }
629
630 for (auto &Callees :
631 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
632 for (auto &FS : Callees.second) {
633 distributeIRToProfileLocationMap(FS.second);
634 }
635 }
636}
637
638// Use a central place to distribute the matching results. Outlined and inlined
639// profile with the function name will be set to the same pointer.
640void SampleProfileMatcher::distributeIRToProfileLocationMap() {
641 for (auto &I : Reader.getProfiles()) {
642 distributeIRToProfileLocationMap(I.second);
643 }
644}
BlockVerifier::State From
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Debug location.
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator end()
Definition: StringMap.h:220
iterator find(StringRef Key)
Definition: StringMap.h:233
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned size() const
Definition: Trace.h:95
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:744
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1085
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Definition: SampleProf.h:1196
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1417
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:206
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: SampleProf.h:737
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition: SampleProf.h:735
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.
Definition: SampleProf.h:280