19using namespace sampleprof;
21#define DEBUG_TYPE "sample-profile-matcher"
27void SampleProfileMatcher::findIRAnchors(
const Function &
F,
32 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
33 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
37 DIL = DIL->getInlinedAt();
38 }
while (DIL->getInlinedAt());
42 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
43 return std::make_pair(Callsite,
FunctionId(CalleeName));
46 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
47 StringRef CalleeName = UnknownIndirectCallee;
48 if (
Function *Callee = CB->getCalledFunction())
63 if (DIL->getInlinedAt()) {
64 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
68 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
70 if (!isa<IntrinsicInst>(&
I))
71 CalleeName = GetCanonicalCalleeName(CB);
74 IRAnchors.emplace(Loc,
FunctionId(CalleeName));
81 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
84 if (DIL->getInlinedAt()) {
85 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
89 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
90 IRAnchors.emplace(Callsite,
FunctionId(CalleeName));
99 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
100 return LineOffset & 0x8000;
105 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
113 for (
const auto &
I :
FS.getBodySamples()) {
117 for (
const auto &
C :
I.second.getCallTargets())
118 InsertAnchor(Loc,
C.first, ProfileAnchors);
121 for (
const auto &
I :
FS.getCallsiteSamples()) {
125 for (
const auto &
C :
I.second)
126 InsertAnchor(Loc,
C.first, ProfileAnchors);
130LocToLocMap SampleProfileMatcher::longestCommonSequence(
132 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
138 return EqualLocations;
141 auto Backtrack = [&](
const std::vector<std::vector<int32_t>> &
Trace,
145 int32_t
X = Size1,
Y = Size2;
155 int32_t PrevX =
P[
Index(PrevK)];
156 int32_t PrevY = PrevX - PrevK;
157 while (
X > PrevX &&
Y > PrevY) {
160 EqualLocations.insert({AnchorList1[
X].first, AnchorList2[
Y].first});
178 std::vector<int32_t>
V(2 *
MaxDepth + 1, -1);
181 std::vector<std::vector<int32_t>>
Trace;
185 int32_t
X = 0,
Y = 0;
191 while (
X < Size1 &&
Y < Size2 &&
192 AnchorList1[
X].second == AnchorList2[
Y].second)
197 if (
X >= Size1 &&
Y >= Size2) {
199 Backtrack(
Trace, AnchorList1, AnchorList2, EqualLocations);
200 return EqualLocations;
205 return EqualLocations;
208void SampleProfileMatcher::matchNonCallsiteLocs(
214 IRToProfileLocationMap.insert({
From, To});
218 int32_t LocationDelta = 0;
220 for (
const auto &
IR : IRAnchors) {
221 const auto &Loc =
IR.first;
222 bool IsMatchedAnchor =
false;
224 auto R = MatchedAnchors.find(Loc);
225 if (R != MatchedAnchors.end()) {
226 const auto &Candidate =
R->second;
227 InsertMatching(Loc, Candidate);
229 <<
" is matched from " << Loc <<
" to " << Candidate
231 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
237 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
238 I < LastMatchedNonAnchors.
size();
I++) {
239 const auto &
L = LastMatchedNonAnchors[
I];
240 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
241 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
242 InsertMatching(L, Candidate);
244 <<
" to " << Candidate <<
"\n");
247 IsMatchedAnchor =
true;
248 LastMatchedNonAnchors.
clear();
252 if (!IsMatchedAnchor) {
255 InsertMatching(Loc, Candidate);
257 << Candidate <<
"\n");
280void SampleProfileMatcher::runStaleProfileMatching(
285 assert(IRToProfileLocationMap.empty() &&
286 "Run stale profile matching only once per function");
289 for (
const auto &
I : ProfileAnchors)
290 FilteredProfileAnchorList.emplace_back(
I);
294 for (
const auto &
I : IRAnchors) {
295 if (
I.second.stringRef().empty())
297 FilteredIRAnchorsList.emplace_back(
I);
300 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
307 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
311 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
314void SampleProfileMatcher::runOnFunction(
Function &
F) {
321 const auto *FSFlattened = getFlattenedSamplesFor(
F);
329 findIRAnchors(
F, IRAnchors);
333 findProfileAnchors(*FSFlattened, ProfileAnchors);
337 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
nullptr);
349 F.addFnAttr(
"profile-checksum-mismatch");
353 auto &IRToProfileLocationMap = getIRToProfileLocationMap(
F);
354 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
355 IRToProfileLocationMap);
358 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
359 &IRToProfileLocationMap);
363void SampleProfileMatcher::recordCallsiteMatchStates(
367 bool IsPostMatch = IRToProfileLocationMap !=
nullptr;
368 auto &CallsiteMatchStates =
371 auto MapIRLocToProfileLoc = [&](
const LineLocation &IRLoc) {
373 if (!IRToProfileLocationMap)
375 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
376 if (ProfileLoc != IRToProfileLocationMap->end())
377 return ProfileLoc->second;
382 for (
const auto &
I : IRAnchors) {
385 const auto &ProfileLoc = MapIRLocToProfileLoc(
I.first);
386 const auto &IRCalleeId =
I.second;
387 const auto &It = ProfileAnchors.find(ProfileLoc);
388 if (It == ProfileAnchors.end())
390 const auto &ProfCalleeId = It->second;
391 if (IRCalleeId == ProfCalleeId) {
392 auto It = CallsiteMatchStates.find(ProfileLoc);
393 if (It == CallsiteMatchStates.end())
394 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
395 else if (IsPostMatch) {
396 if (It->second == MatchState::InitialMatch)
397 It->second = MatchState::UnchangedMatch;
398 else if (It->second == MatchState::InitialMismatch)
399 It->second = MatchState::RecoveredMismatch;
406 for (
const auto &
I : ProfileAnchors) {
407 const auto &Loc =
I.first;
408 assert(!
I.second.stringRef().empty() &&
"Callees should not be empty");
409 auto It = CallsiteMatchStates.find(Loc);
410 if (It == CallsiteMatchStates.end())
411 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
412 else if (IsPostMatch) {
415 if (It->second == MatchState::InitialMismatch)
416 It->second = MatchState::UnchangedMismatch;
417 else if (It->second == MatchState::InitialMatch)
418 It->second = MatchState::RemovedMatch;
423void SampleProfileMatcher::countMismatchedFuncSamples(
const FunctionSamples &FS,
425 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getGUID());
432 NumStaleProfileFunc++;
437 MismatchedFunctionSamples +=
FS.getTotalSamples();
446 for (
const auto &
I :
FS.getCallsiteSamples())
447 for (
const auto &CS :
I.second)
448 countMismatchedFuncSamples(CS.second,
false);
451void SampleProfileMatcher::countMismatchedCallsiteSamples(
453 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
455 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
457 const auto &CallsiteMatchStates = It->second;
460 auto It = CallsiteMatchStates.find(Loc);
461 if (It == CallsiteMatchStates.end())
462 return MatchState::Unknown;
466 auto AttributeMismatchedSamples = [&](
const enum MatchState &State,
468 if (isMismatchState(State))
469 MismatchedCallsiteSamples += Samples;
470 else if (State == MatchState::RecoveredMismatch)
471 RecoveredCallsiteSamples += Samples;
476 for (
const auto &
I :
FS.getBodySamples())
477 AttributeMismatchedSamples(findMatchState(
I.first),
I.second.getSamples());
480 for (
const auto &
I :
FS.getCallsiteSamples()) {
481 auto State = findMatchState(
I.first);
483 for (
const auto &CS :
I.second)
484 CallsiteSamples += CS.second.getTotalSamples();
485 AttributeMismatchedSamples(State, CallsiteSamples);
487 if (isMismatchState(State))
493 for (
const auto &CS :
I.second)
494 countMismatchedCallsiteSamples(CS.second);
498void SampleProfileMatcher::countMismatchCallsites(
const FunctionSamples &FS) {
499 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
501 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
503 const auto &MatchStates = It->second;
504 [[maybe_unused]]
bool OnInitialState =
505 isInitialState(MatchStates.begin()->second);
506 for (
const auto &
I : MatchStates) {
507 TotalProfiledCallsites++;
509 (OnInitialState ? isInitialState(
I.second) : isFinalState(
I.second)) &&
510 "Profile matching state is inconsistent");
512 if (isMismatchState(
I.second))
513 NumMismatchedCallsites++;
514 else if (
I.second == MatchState::RecoveredMismatch)
515 NumRecoveredCallsites++;
519void SampleProfileMatcher::computeAndReportProfileStaleness() {
524 for (
const auto &
F : M) {
535 TotalFunctionSamples +=
FS->getTotalSamples();
539 countMismatchedFuncSamples(*FS,
true);
542 countMismatchCallsites(*FS);
543 countMismatchedCallsiteSamples(*FS);
548 errs() <<
"(" << NumStaleProfileFunc <<
"/" << TotalProfiledFunc
549 <<
") of functions' profile are invalid and ("
550 << MismatchedFunctionSamples <<
"/" << TotalFunctionSamples
551 <<
") of samples are discarded due to function hash mismatch.\n";
553 errs() <<
"(" << (NumMismatchedCallsites + NumRecoveredCallsites) <<
"/"
554 << TotalProfiledCallsites
555 <<
") of callsites' profile are invalid and ("
556 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) <<
"/"
557 << TotalFunctionSamples
558 <<
") of samples are discarded due to callsite location mismatch.\n";
559 errs() <<
"(" << NumRecoveredCallsites <<
"/"
560 << (NumRecoveredCallsites + NumMismatchedCallsites)
561 <<
") of callsites and (" << RecoveredCallsiteSamples <<
"/"
562 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
563 <<
") of samples are recovered by stale profile matching.\n";
572 ProfStatsVec.
emplace_back(
"NumStaleProfileFunc", NumStaleProfileFunc);
573 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
575 MismatchedFunctionSamples);
576 ProfStatsVec.
emplace_back(
"TotalFunctionSamples", TotalFunctionSamples);
579 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
580 ProfStatsVec.
emplace_back(
"NumRecoveredCallsites", NumRecoveredCallsites);
581 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
583 MismatchedCallsiteSamples);
585 RecoveredCallsiteSamples);
587 auto *MD = MDB.createLLVMStats(ProfStatsVec);
588 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
602 distributeIRToProfileLocationMap();
604 computeAndReportProfileStaleness();
607void SampleProfileMatcher::distributeIRToProfileLocationMap(
609 const auto ProfileMappings = FuncMappings.
find(FS.getFuncName());
610 if (ProfileMappings != FuncMappings.
end()) {
611 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
616 for (
auto &FS : Callees.second) {
617 distributeIRToProfileLocationMap(FS.second);
624void SampleProfileMatcher::distributeIRToProfileLocationMap() {
626 distributeIRToProfileLocationMap(
I.second);
BlockVerifier::State From
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static const unsigned MaxDepth
Legalize the Machine IR a function s Machine IR
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
This is an important class for using LLVM in a threaded context.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(StringRef Key)
StringRef - Represent a constant reference to a string, i.e.
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
@ C
The default llvm calling convention, compatible with C.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
This is an optimization pass for GlobalISel generic memory operations.
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
std::map< LineLocation, FunctionId > AnchorMap
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.