20using namespace sampleprof;
22#define DEBUG_TYPE "sample-profile-matcher"
30 cl::desc(
"The maximum number of callsites in a function, above which stale "
31 "profile matching will be skipped."));
33void SampleProfileMatcher::findIRAnchors(
const Function &
F,
38 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
39 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
43 DIL = DIL->getInlinedAt();
44 }
while (DIL->getInlinedAt());
48 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
49 return std::make_pair(Callsite,
FunctionId(CalleeName));
52 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
53 StringRef CalleeName = UnknownIndirectCallee;
54 if (
Function *Callee = CB->getCalledFunction())
69 if (DIL->getInlinedAt()) {
70 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
74 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
76 if (!isa<IntrinsicInst>(&
I))
77 CalleeName = GetCanonicalCalleeName(CB);
80 IRAnchors.emplace(Loc,
FunctionId(CalleeName));
87 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
90 if (DIL->getInlinedAt()) {
91 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
95 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
96 IRAnchors.emplace(Callsite,
FunctionId(CalleeName));
103void SampleProfileMatcher::findProfileAnchors(
const FunctionSamples &FS,
105 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
106 return LineOffset & 0x8000;
111 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
119 for (
const auto &
I :
FS.getBodySamples()) {
123 for (
const auto &
C :
I.second.getCallTargets())
124 InsertAnchor(Loc,
C.first, ProfileAnchors);
127 for (
const auto &
I :
FS.getCallsiteSamples()) {
131 for (
const auto &
C :
I.second)
132 InsertAnchor(Loc,
C.first, ProfileAnchors);
136LocToLocMap SampleProfileMatcher::longestCommonSequence(
138 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
144 return EqualLocations;
147 auto Backtrack = [&](
const std::vector<std::vector<int32_t>> &
Trace,
151 int32_t
X = Size1,
Y = Size2;
161 int32_t PrevX =
P[
Index(PrevK)];
162 int32_t PrevY = PrevX - PrevK;
163 while (
X > PrevX &&
Y > PrevY) {
166 EqualLocations.insert({AnchorList1[
X].first, AnchorList2[
Y].first});
184 std::vector<int32_t>
V(2 *
MaxDepth + 1, -1);
187 std::vector<std::vector<int32_t>>
Trace;
191 int32_t
X = 0,
Y = 0;
197 while (
X < Size1 &&
Y < Size2 &&
198 AnchorList1[
X].second == AnchorList2[
Y].second)
203 if (
X >= Size1 &&
Y >= Size2) {
205 Backtrack(
Trace, AnchorList1, AnchorList2, EqualLocations);
206 return EqualLocations;
211 return EqualLocations;
214void SampleProfileMatcher::matchNonCallsiteLocs(
220 IRToProfileLocationMap.insert({
From, To});
224 int32_t LocationDelta = 0;
226 for (
const auto &
IR : IRAnchors) {
227 const auto &Loc =
IR.first;
228 bool IsMatchedAnchor =
false;
230 auto R = MatchedAnchors.find(Loc);
231 if (R != MatchedAnchors.end()) {
232 const auto &Candidate =
R->second;
233 InsertMatching(Loc, Candidate);
235 <<
" is matched from " << Loc <<
" to " << Candidate
237 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
243 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
244 I < LastMatchedNonAnchors.
size();
I++) {
245 const auto &
L = LastMatchedNonAnchors[
I];
246 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
247 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
248 InsertMatching(L, Candidate);
250 <<
" to " << Candidate <<
"\n");
253 IsMatchedAnchor =
true;
254 LastMatchedNonAnchors.
clear();
258 if (!IsMatchedAnchor) {
261 InsertMatching(Loc, Candidate);
263 << Candidate <<
"\n");
286void SampleProfileMatcher::runStaleProfileMatching(
291 assert(IRToProfileLocationMap.empty() &&
292 "Run stale profile matching only once per function");
295 for (
const auto &
I : ProfileAnchors)
296 FilteredProfileAnchorList.emplace_back(
I);
300 for (
const auto &
I : IRAnchors) {
301 if (
I.second.stringRef().empty())
303 FilteredIRAnchorsList.emplace_back(
I);
306 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
312 <<
" because the number of callsites in the IR is "
313 << FilteredIRAnchorsList.size()
314 <<
" and in the profile is "
315 << FilteredProfileAnchorList.size() <<
"\n");
323 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
327 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
330void SampleProfileMatcher::runOnFunction(
Function &
F) {
337 const auto *FSFlattened = getFlattenedSamplesFor(
F);
345 findIRAnchors(
F, IRAnchors);
349 findProfileAnchors(*FSFlattened, ProfileAnchors);
353 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
nullptr);
365 F.addFnAttr(
"profile-checksum-mismatch");
369 auto &IRToProfileLocationMap = getIRToProfileLocationMap(
F);
370 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
371 IRToProfileLocationMap);
374 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
375 &IRToProfileLocationMap);
379void SampleProfileMatcher::recordCallsiteMatchStates(
383 bool IsPostMatch = IRToProfileLocationMap !=
nullptr;
384 auto &CallsiteMatchStates =
387 auto MapIRLocToProfileLoc = [&](
const LineLocation &IRLoc) {
389 if (!IRToProfileLocationMap)
391 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
392 if (ProfileLoc != IRToProfileLocationMap->end())
393 return ProfileLoc->second;
398 for (
const auto &
I : IRAnchors) {
401 const auto &ProfileLoc = MapIRLocToProfileLoc(
I.first);
402 const auto &IRCalleeId =
I.second;
403 const auto &It = ProfileAnchors.find(ProfileLoc);
404 if (It == ProfileAnchors.end())
406 const auto &ProfCalleeId = It->second;
407 if (IRCalleeId == ProfCalleeId) {
408 auto It = CallsiteMatchStates.find(ProfileLoc);
409 if (It == CallsiteMatchStates.end())
410 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
411 else if (IsPostMatch) {
412 if (It->second == MatchState::InitialMatch)
413 It->second = MatchState::UnchangedMatch;
414 else if (It->second == MatchState::InitialMismatch)
415 It->second = MatchState::RecoveredMismatch;
422 for (
const auto &
I : ProfileAnchors) {
423 const auto &Loc =
I.first;
424 assert(!
I.second.stringRef().empty() &&
"Callees should not be empty");
425 auto It = CallsiteMatchStates.find(Loc);
426 if (It == CallsiteMatchStates.end())
427 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
428 else if (IsPostMatch) {
431 if (It->second == MatchState::InitialMismatch)
432 It->second = MatchState::UnchangedMismatch;
433 else if (It->second == MatchState::InitialMatch)
434 It->second = MatchState::RemovedMatch;
439void SampleProfileMatcher::countMismatchedFuncSamples(
const FunctionSamples &FS,
441 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getGUID());
448 NumStaleProfileFunc++;
453 MismatchedFunctionSamples +=
FS.getTotalSamples();
462 for (
const auto &
I :
FS.getCallsiteSamples())
463 for (
const auto &CS :
I.second)
464 countMismatchedFuncSamples(CS.second,
false);
467void SampleProfileMatcher::countMismatchedCallsiteSamples(
469 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
471 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
473 const auto &CallsiteMatchStates = It->second;
476 auto It = CallsiteMatchStates.find(Loc);
477 if (It == CallsiteMatchStates.end())
478 return MatchState::Unknown;
482 auto AttributeMismatchedSamples = [&](
const enum MatchState &State,
484 if (isMismatchState(State))
485 MismatchedCallsiteSamples += Samples;
486 else if (State == MatchState::RecoveredMismatch)
487 RecoveredCallsiteSamples += Samples;
492 for (
const auto &
I :
FS.getBodySamples())
493 AttributeMismatchedSamples(findMatchState(
I.first),
I.second.getSamples());
496 for (
const auto &
I :
FS.getCallsiteSamples()) {
497 auto State = findMatchState(
I.first);
499 for (
const auto &CS :
I.second)
500 CallsiteSamples += CS.second.getTotalSamples();
501 AttributeMismatchedSamples(State, CallsiteSamples);
503 if (isMismatchState(State))
509 for (
const auto &CS :
I.second)
510 countMismatchedCallsiteSamples(CS.second);
514void SampleProfileMatcher::countMismatchCallsites(
const FunctionSamples &FS) {
515 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
517 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
519 const auto &MatchStates = It->second;
520 [[maybe_unused]]
bool OnInitialState =
521 isInitialState(MatchStates.begin()->second);
522 for (
const auto &
I : MatchStates) {
523 TotalProfiledCallsites++;
525 (OnInitialState ? isInitialState(
I.second) : isFinalState(
I.second)) &&
526 "Profile matching state is inconsistent");
528 if (isMismatchState(
I.second))
529 NumMismatchedCallsites++;
530 else if (
I.second == MatchState::RecoveredMismatch)
531 NumRecoveredCallsites++;
535void SampleProfileMatcher::computeAndReportProfileStaleness() {
540 for (
const auto &
F : M) {
551 TotalFunctionSamples +=
FS->getTotalSamples();
555 countMismatchedFuncSamples(*FS,
true);
558 countMismatchCallsites(*FS);
559 countMismatchedCallsiteSamples(*FS);
564 errs() <<
"(" << NumStaleProfileFunc <<
"/" << TotalProfiledFunc
565 <<
") of functions' profile are invalid and ("
566 << MismatchedFunctionSamples <<
"/" << TotalFunctionSamples
567 <<
") of samples are discarded due to function hash mismatch.\n";
569 errs() <<
"(" << (NumMismatchedCallsites + NumRecoveredCallsites) <<
"/"
570 << TotalProfiledCallsites
571 <<
") of callsites' profile are invalid and ("
572 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) <<
"/"
573 << TotalFunctionSamples
574 <<
") of samples are discarded due to callsite location mismatch.\n";
575 errs() <<
"(" << NumRecoveredCallsites <<
"/"
576 << (NumRecoveredCallsites + NumMismatchedCallsites)
577 <<
") of callsites and (" << RecoveredCallsiteSamples <<
"/"
578 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
579 <<
") of samples are recovered by stale profile matching.\n";
588 ProfStatsVec.
emplace_back(
"NumStaleProfileFunc", NumStaleProfileFunc);
589 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
591 MismatchedFunctionSamples);
592 ProfStatsVec.
emplace_back(
"TotalFunctionSamples", TotalFunctionSamples);
595 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
596 ProfStatsVec.
emplace_back(
"NumRecoveredCallsites", NumRecoveredCallsites);
597 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
599 MismatchedCallsiteSamples);
601 RecoveredCallsiteSamples);
603 auto *MD = MDB.createLLVMStats(ProfStatsVec);
604 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
618 distributeIRToProfileLocationMap();
620 computeAndReportProfileStaleness();
623void SampleProfileMatcher::distributeIRToProfileLocationMap(
625 const auto ProfileMappings = FuncMappings.
find(FS.getFuncName());
626 if (ProfileMappings != FuncMappings.
end()) {
627 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
632 for (
auto &FS : Callees.second) {
633 distributeIRToProfileLocationMap(FS.second);
640void SampleProfileMatcher::distributeIRToProfileLocationMap() {
642 distributeIRToProfileLocationMap(
I.second);
BlockVerifier::State From
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
Legalize the Machine IR a function s Machine IR
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
This is an important class for using LLVM in a threaded context.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(StringRef Key)
StringRef - Represent a constant reference to a string, i.e.
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
This is an optimization pass for GlobalISel generic memory operations.
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.