LLVM 19.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/IR/MDBuilder.h"
17
18using namespace llvm;
19using namespace sampleprof;
20
21#define DEBUG_TYPE "sample-profile-matcher"
22
26
27void SampleProfileMatcher::findIRAnchors(const Function &F,
28 AnchorMap &IRAnchors) {
29 // For inlined code, recover the original callsite and callee by finding the
30 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
31 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
32 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
33 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
34 const DILocation *PrevDIL = nullptr;
35 do {
36 PrevDIL = DIL;
37 DIL = DIL->getInlinedAt();
38 } while (DIL->getInlinedAt());
39
42 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
43 return std::make_pair(Callsite, FunctionId(CalleeName));
44 };
45
46 auto GetCanonicalCalleeName = [](const CallBase *CB) {
47 StringRef CalleeName = UnknownIndirectCallee;
48 if (Function *Callee = CB->getCalledFunction())
49 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
50 return CalleeName;
51 };
52
53 // Extract profile matching anchors in the IR.
54 for (auto &BB : F) {
55 for (auto &I : BB) {
56 DILocation *DIL = I.getDebugLoc();
57 if (!DIL)
58 continue;
59
61 if (auto Probe = extractProbe(I)) {
62 // Flatten inlined IR for the matching.
63 if (DIL->getInlinedAt()) {
64 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
65 } else {
66 // Use empty StringRef for basic block probe.
67 StringRef CalleeName;
68 if (const auto *CB = dyn_cast<CallBase>(&I)) {
69 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
70 if (!isa<IntrinsicInst>(&I))
71 CalleeName = GetCanonicalCalleeName(CB);
72 }
73 LineLocation Loc = LineLocation(Probe->Id, 0);
74 IRAnchors.emplace(Loc, FunctionId(CalleeName));
75 }
76 }
77 } else {
78 // TODO: For line-number based profile(AutoFDO), currently only support
79 // find callsite anchors. In future, we need to parse all the non-call
80 // instructions to extract the line locations for profile matching.
81 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
82 continue;
83
84 if (DIL->getInlinedAt()) {
85 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
86 } else {
89 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
90 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
91 }
92 }
93 }
94 }
95}
96
97void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
98 AnchorMap &ProfileAnchors) {
99 auto isInvalidLineOffset = [](uint32_t LineOffset) {
100 return LineOffset & 0x8000;
101 };
102
103 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
104 AnchorMap &ProfileAnchors) {
105 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
106 if (!Ret.second) {
107 // For multiple callees, which indicates it's an indirect call, we use a
108 // dummy name(UnknownIndirectCallee) as the indrect callee name.
109 Ret.first->second = FunctionId(UnknownIndirectCallee);
110 }
111 };
112
113 for (const auto &I : FS.getBodySamples()) {
114 const LineLocation &Loc = I.first;
115 if (isInvalidLineOffset(Loc.LineOffset))
116 continue;
117 for (const auto &C : I.second.getCallTargets())
118 InsertAnchor(Loc, C.first, ProfileAnchors);
119 }
120
121 for (const auto &I : FS.getCallsiteSamples()) {
122 const LineLocation &Loc = I.first;
123 if (isInvalidLineOffset(Loc.LineOffset))
124 continue;
125 for (const auto &C : I.second)
126 InsertAnchor(Loc, C.first, ProfileAnchors);
127 }
128}
129
130LocToLocMap SampleProfileMatcher::longestCommonSequence(
131 const AnchorList &AnchorList1, const AnchorList &AnchorList2) const {
132 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
133 MaxDepth = Size1 + Size2;
134 auto Index = [&](int32_t I) { return I + MaxDepth; };
135
136 LocToLocMap EqualLocations;
137 if (MaxDepth == 0)
138 return EqualLocations;
139
140 // Backtrack the SES result.
141 auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
142 const AnchorList &AnchorList1,
143 const AnchorList &AnchorList2,
144 LocToLocMap &EqualLocations) {
145 int32_t X = Size1, Y = Size2;
146 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
147 const auto &P = Trace[Depth];
148 int32_t K = X - Y;
149 int32_t PrevK = K;
150 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
151 PrevK = K + 1;
152 else
153 PrevK = K - 1;
154
155 int32_t PrevX = P[Index(PrevK)];
156 int32_t PrevY = PrevX - PrevK;
157 while (X > PrevX && Y > PrevY) {
158 X--;
159 Y--;
160 EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
161 }
162
163 if (Depth == 0)
164 break;
165
166 if (Y == PrevY)
167 X--;
168 else if (X == PrevX)
169 Y--;
170 X = PrevX;
171 Y = PrevY;
172 }
173 };
174
175 // The greedy LCS/SES algorithm.
176
177 // An array contains the endpoints of the furthest reaching D-paths.
178 std::vector<int32_t> V(2 * MaxDepth + 1, -1);
179 V[Index(1)] = 0;
180 // Trace is used to backtrack the SES result.
181 std::vector<std::vector<int32_t>> Trace;
182 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
183 Trace.push_back(V);
184 for (int32_t K = -Depth; K <= Depth; K += 2) {
185 int32_t X = 0, Y = 0;
186 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
187 X = V[Index(K + 1)];
188 else
189 X = V[Index(K - 1)] + 1;
190 Y = X - K;
191 while (X < Size1 && Y < Size2 &&
192 AnchorList1[X].second == AnchorList2[Y].second)
193 X++, Y++;
194
195 V[Index(K)] = X;
196
197 if (X >= Size1 && Y >= Size2) {
198 // Length of an SES is D.
199 Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
200 return EqualLocations;
201 }
202 }
203 }
204 // Length of an SES is greater than MaxDepth.
205 return EqualLocations;
206}
207
208void SampleProfileMatcher::matchNonCallsiteLocs(
209 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
210 LocToLocMap &IRToProfileLocationMap) {
211 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
212 // Skip the unchanged location mapping to save memory.
213 if (From != To)
214 IRToProfileLocationMap.insert({From, To});
215 };
216
217 // Use function's beginning location as the initial anchor.
218 int32_t LocationDelta = 0;
219 SmallVector<LineLocation> LastMatchedNonAnchors;
220 for (const auto &IR : IRAnchors) {
221 const auto &Loc = IR.first;
222 bool IsMatchedAnchor = false;
223 // Match the anchor location in lexical order.
224 auto R = MatchedAnchors.find(Loc);
225 if (R != MatchedAnchors.end()) {
226 const auto &Candidate = R->second;
227 InsertMatching(Loc, Candidate);
228 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
229 << " is matched from " << Loc << " to " << Candidate
230 << "\n");
231 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
232
233 // Match backwards for non-anchor locations.
234 // The locations in LastMatchedNonAnchors have been matched forwards
235 // based on the previous anchor, spilt it evenly and overwrite the
236 // second half based on the current anchor.
237 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
238 I < LastMatchedNonAnchors.size(); I++) {
239 const auto &L = LastMatchedNonAnchors[I];
240 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
241 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
242 InsertMatching(L, Candidate);
243 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
244 << " to " << Candidate << "\n");
245 }
246
247 IsMatchedAnchor = true;
248 LastMatchedNonAnchors.clear();
249 }
250
251 // Match forwards for non-anchor locations.
252 if (!IsMatchedAnchor) {
253 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
254 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
255 InsertMatching(Loc, Candidate);
256 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
257 << Candidate << "\n");
258 LastMatchedNonAnchors.emplace_back(Loc);
259 }
260 }
261}
262
263// Call target name anchor based profile fuzzy matching.
264// Input:
265// For IR locations, the anchor is the callee name of direct callsite; For
266// profile locations, it's the call target name for BodySamples or inlinee's
267// profile name for CallsiteSamples.
268// Matching heuristic:
269// First match all the anchors using the diff algorithm, then split the
270// non-anchor locations between the two anchors evenly, first half are matched
271// based on the start anchor, second half are matched based on the end anchor.
272// For example, given:
273// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
274// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
275// The matching gives:
276// [1, 2(foo), 3, 5, 6(bar), 7]
277// | | | | | |
278// [1, 2, 3(foo), 4, 7, 8(bar), 9]
279// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
280void SampleProfileMatcher::runStaleProfileMatching(
281 const Function &F, const AnchorMap &IRAnchors,
282 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap) {
283 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
284 << "\n");
285 assert(IRToProfileLocationMap.empty() &&
286 "Run stale profile matching only once per function");
287
288 AnchorList FilteredProfileAnchorList;
289 for (const auto &I : ProfileAnchors)
290 FilteredProfileAnchorList.emplace_back(I);
291
292 AnchorList FilteredIRAnchorsList;
293 // Filter the non-callsite from IRAnchors.
294 for (const auto &I : IRAnchors) {
295 if (I.second.stringRef().empty())
296 continue;
297 FilteredIRAnchorsList.emplace_back(I);
298 }
299
300 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
301 return;
302
303 // Match the callsite anchors by finding the longest common subsequence
304 // between IR and profile. Note that we need to use IR anchor as base(A side)
305 // to align with the order of IRToProfileLocationMap.
306 LocToLocMap MatchedAnchors =
307 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
308
309 // Match the non-callsite locations and write the result to
310 // IRToProfileLocationMap.
311 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
312}
313
314void SampleProfileMatcher::runOnFunction(Function &F) {
315 // We need to use flattened function samples for matching.
316 // Unlike IR, which includes all callsites from the source code, the callsites
317 // in profile only show up when they are hit by samples, i,e. the profile
318 // callsites in one context may differ from those in another context. To get
319 // the maximum number of callsites, we merge the function profiles from all
320 // contexts, aka, the flattened profile to find profile anchors.
321 const auto *FSFlattened = getFlattenedSamplesFor(F);
322 if (!FSFlattened)
323 return;
324
325 // Anchors for IR. It's a map from IR location to callee name, callee name is
326 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
327 // for unknown indrect callee name.
328 AnchorMap IRAnchors;
329 findIRAnchors(F, IRAnchors);
330 // Anchors for profile. It's a map from callsite location to a set of callee
331 // name.
332 AnchorMap ProfileAnchors;
333 findProfileAnchors(*FSFlattened, ProfileAnchors);
334
335 // Compute the callsite match states for profile staleness report.
337 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
338
339 // For probe-based profiles, run matching only when the current profile is not
340 // valid.
342 !ProbeManager->profileIsValid(F, *FSFlattened))) {
343 // For imported functions, the checksum metadata(pseudo_probe_desc) are
344 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
345 // transfer the info: add the attribute during pre-link phase and check it
346 // during post-link phase(see "profileIsValid").
349 F.addFnAttr("profile-checksum-mismatch");
350
351 // The matching result will be saved to IRToProfileLocationMap, create a
352 // new map for each function.
353 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
354 runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
355 IRToProfileLocationMap);
356 // Find and update callsite match states after matching.
358 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
359 &IRToProfileLocationMap);
360 }
361}
362
363void SampleProfileMatcher::recordCallsiteMatchStates(
364 const Function &F, const AnchorMap &IRAnchors,
365 const AnchorMap &ProfileAnchors,
366 const LocToLocMap *IRToProfileLocationMap) {
367 bool IsPostMatch = IRToProfileLocationMap != nullptr;
368 auto &CallsiteMatchStates =
369 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
370
371 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
372 // IRToProfileLocationMap is null in pre-match phrase.
373 if (!IRToProfileLocationMap)
374 return IRLoc;
375 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
376 if (ProfileLoc != IRToProfileLocationMap->end())
377 return ProfileLoc->second;
378 else
379 return IRLoc;
380 };
381
382 for (const auto &I : IRAnchors) {
383 // After fuzzy profile matching, use the matching result to remap the
384 // current IR callsite.
385 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
386 const auto &IRCalleeId = I.second;
387 const auto &It = ProfileAnchors.find(ProfileLoc);
388 if (It == ProfileAnchors.end())
389 continue;
390 const auto &ProfCalleeId = It->second;
391 if (IRCalleeId == ProfCalleeId) {
392 auto It = CallsiteMatchStates.find(ProfileLoc);
393 if (It == CallsiteMatchStates.end())
394 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
395 else if (IsPostMatch) {
396 if (It->second == MatchState::InitialMatch)
397 It->second = MatchState::UnchangedMatch;
398 else if (It->second == MatchState::InitialMismatch)
399 It->second = MatchState::RecoveredMismatch;
400 }
401 }
402 }
403
404 // Check if there are any callsites in the profile that does not match to any
405 // IR callsites.
406 for (const auto &I : ProfileAnchors) {
407 const auto &Loc = I.first;
408 assert(!I.second.stringRef().empty() && "Callees should not be empty");
409 auto It = CallsiteMatchStates.find(Loc);
410 if (It == CallsiteMatchStates.end())
411 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
412 else if (IsPostMatch) {
413 // Update the state if it's not matched(UnchangedMatch or
414 // RecoveredMismatch).
415 if (It->second == MatchState::InitialMismatch)
416 It->second = MatchState::UnchangedMismatch;
417 else if (It->second == MatchState::InitialMatch)
418 It->second = MatchState::RemovedMatch;
419 }
420 }
421}
422
423void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
424 bool IsTopLevel) {
425 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
426 // Skip the function that is external or renamed.
427 if (!FuncDesc)
428 return;
429
430 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
431 if (IsTopLevel)
432 NumStaleProfileFunc++;
433 // Given currently all probe ids are after block probe ids, once the
434 // checksum is mismatched, it's likely all the callites are mismatched and
435 // dropped. We conservatively count all the samples as mismatched and stop
436 // counting the inlinees' profiles.
437 MismatchedFunctionSamples += FS.getTotalSamples();
438 return;
439 }
440
441 // Even the current-level function checksum is matched, it's possible that the
442 // nested inlinees' checksums are mismatched that affect the inlinee's sample
443 // loading, we need to go deeper to check the inlinees' function samples.
444 // Similarly, count all the samples as mismatched if the inlinee's checksum is
445 // mismatched using this recursive function.
446 for (const auto &I : FS.getCallsiteSamples())
447 for (const auto &CS : I.second)
448 countMismatchedFuncSamples(CS.second, false);
449}
450
451void SampleProfileMatcher::countMismatchedCallsiteSamples(
452 const FunctionSamples &FS) {
453 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
454 // Skip it if no mismatched callsite or this is an external function.
455 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
456 return;
457 const auto &CallsiteMatchStates = It->second;
458
459 auto findMatchState = [&](const LineLocation &Loc) {
460 auto It = CallsiteMatchStates.find(Loc);
461 if (It == CallsiteMatchStates.end())
462 return MatchState::Unknown;
463 return It->second;
464 };
465
466 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
467 uint64_t Samples) {
468 if (isMismatchState(State))
469 MismatchedCallsiteSamples += Samples;
470 else if (State == MatchState::RecoveredMismatch)
471 RecoveredCallsiteSamples += Samples;
472 };
473
474 // The non-inlined callsites are saved in the body samples of function
475 // profile, go through it to count the non-inlined callsite samples.
476 for (const auto &I : FS.getBodySamples())
477 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
478
479 // Count the inlined callsite samples.
480 for (const auto &I : FS.getCallsiteSamples()) {
481 auto State = findMatchState(I.first);
482 uint64_t CallsiteSamples = 0;
483 for (const auto &CS : I.second)
484 CallsiteSamples += CS.second.getTotalSamples();
485 AttributeMismatchedSamples(State, CallsiteSamples);
486
487 if (isMismatchState(State))
488 continue;
489
490 // When the current level of inlined call site matches the profiled call
491 // site, we need to go deeper along the inline tree to count mismatches from
492 // lower level inlinees.
493 for (const auto &CS : I.second)
494 countMismatchedCallsiteSamples(CS.second);
495 }
496}
497
498void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
499 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
500 // Skip it if no mismatched callsite or this is an external function.
501 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
502 return;
503 const auto &MatchStates = It->second;
504 [[maybe_unused]] bool OnInitialState =
505 isInitialState(MatchStates.begin()->second);
506 for (const auto &I : MatchStates) {
507 TotalProfiledCallsites++;
508 assert(
509 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
510 "Profile matching state is inconsistent");
511
512 if (isMismatchState(I.second))
513 NumMismatchedCallsites++;
514 else if (I.second == MatchState::RecoveredMismatch)
515 NumRecoveredCallsites++;
516 }
517}
518
519void SampleProfileMatcher::computeAndReportProfileStaleness() {
521 return;
522
523 // Count profile mismatches for profile staleness report.
524 for (const auto &F : M) {
526 continue;
527 // As the stats will be merged by linker, skip reporting the metrics for
528 // imported functions to avoid repeated counting.
530 continue;
531 const auto *FS = Reader.getSamplesFor(F);
532 if (!FS)
533 continue;
534 TotalProfiledFunc++;
535 TotalFunctionSamples += FS->getTotalSamples();
536
537 // Checksum mismatch is only used in pseudo-probe mode.
539 countMismatchedFuncSamples(*FS, true);
540
541 // Count mismatches and samples for calliste.
542 countMismatchCallsites(*FS);
543 countMismatchedCallsiteSamples(*FS);
544 }
545
548 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
549 << ") of functions' profile are invalid and ("
550 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
551 << ") of samples are discarded due to function hash mismatch.\n";
552 }
553 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
554 << TotalProfiledCallsites
555 << ") of callsites' profile are invalid and ("
556 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
557 << TotalFunctionSamples
558 << ") of samples are discarded due to callsite location mismatch.\n";
559 errs() << "(" << NumRecoveredCallsites << "/"
560 << (NumRecoveredCallsites + NumMismatchedCallsites)
561 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
562 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
563 << ") of samples are recovered by stale profile matching.\n";
564 }
565
567 LLVMContext &Ctx = M.getContext();
568 MDBuilder MDB(Ctx);
569
572 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
573 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
574 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
575 MismatchedFunctionSamples);
576 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
577 }
578
579 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
580 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
581 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
582 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
583 MismatchedCallsiteSamples);
584 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
585 RecoveredCallsiteSamples);
586
587 auto *MD = MDB.createLLVMStats(ProfStatsVec);
588 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
589 NMD->addOperand(MD);
590 }
591}
592
594 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
596 for (auto &F : M) {
598 continue;
599 runOnFunction(F);
600 }
602 distributeIRToProfileLocationMap();
603
604 computeAndReportProfileStaleness();
605}
606
607void SampleProfileMatcher::distributeIRToProfileLocationMap(
608 FunctionSamples &FS) {
609 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
610 if (ProfileMappings != FuncMappings.end()) {
611 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
612 }
613
614 for (auto &Callees :
615 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
616 for (auto &FS : Callees.second) {
617 distributeIRToProfileLocationMap(FS.second);
618 }
619 }
620}
621
622// Use a central place to distribute the matching results. Outlined and inlined
623// profile with the function name will be set to the same pointer.
624void SampleProfileMatcher::distributeIRToProfileLocationMap() {
625 for (auto &I : Reader.getProfiles()) {
626 distributeIRToProfileLocationMap(I.second);
627 }
628}
BlockVerifier::State From
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Debug location.
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:378
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator end()
Definition: StringMap.h:220
iterator find(StringRef Key)
Definition: StringMap.h:233
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned size() const
Definition: Trace.h:95
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:744
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1085
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Definition: SampleProf.h:1196
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:221
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1417
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:206
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: SampleProf.h:737
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition: SampleProf.h:735
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.
Definition: SampleProf.h:280