19using namespace sampleprof;
21#define DEBUG_TYPE "sample-profile-matcher"
27void SampleProfileMatcher::findIRAnchors(
28 const Function &
F, std::map<LineLocation, StringRef> &IRAnchors) {
32 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
33 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
37 DIL = DIL->getInlinedAt();
38 }
while (DIL->getInlinedAt());
41 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
42 return std::make_pair(Callsite, CalleeName);
45 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
46 StringRef CalleeName = UnknownIndirectCallee;
47 if (
Function *Callee = CB->getCalledFunction())
62 if (DIL->getInlinedAt()) {
63 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
67 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
69 if (!isa<IntrinsicInst>(&
I))
70 CalleeName = GetCanonicalCalleeName(CB);
72 IRAnchors.emplace(
LineLocation(Probe->Id, 0), CalleeName);
79 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
82 if (DIL->getInlinedAt()) {
83 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
86 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
87 IRAnchors.emplace(Callsite, CalleeName);
94void SampleProfileMatcher::findProfileAnchors(
96 std::map<
LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
97 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
98 return LineOffset & 0x8000;
101 for (
const auto &
I :
FS.getBodySamples()) {
105 for (
const auto &
I :
I.second.getCallTargets()) {
107 ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
108 Ret.first->second.insert(
I.first);
112 for (
const auto &
I :
FS.getCallsiteSamples()) {
116 const auto &CalleeMap =
I.second;
117 for (
const auto &
I : CalleeMap) {
119 ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
120 Ret.first->second.insert(
I.first);
142void SampleProfileMatcher::runStaleProfileMatching(
143 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
144 const std::map<
LineLocation, std::unordered_set<FunctionId>>
149 assert(IRToProfileLocationMap.empty() &&
150 "Run stale profile matching only once per function");
152 std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
153 for (
const auto &
I : ProfileAnchors) {
154 const auto &Loc =
I.first;
155 const auto &Callees =
I.second;
157 if (Callees.size() == 1) {
159 const auto &Candidates = CalleeToCallsitesMap.try_emplace(
160 CalleeName, std::set<LineLocation>());
161 Candidates.first->second.insert(Loc);
168 IRToProfileLocationMap.insert({
From, To});
172 int32_t LocationDelta = 0;
175 for (
const auto &
IR : IRAnchors) {
176 const auto &Loc =
IR.first;
177 auto CalleeName =
IR.second;
178 bool IsMatchedAnchor =
false;
180 if (!CalleeName.
empty()) {
181 auto CandidateAnchors =
183 if (CandidateAnchors != CalleeToCallsitesMap.end() &&
184 !CandidateAnchors->second.empty()) {
185 auto CI = CandidateAnchors->second.begin();
186 const auto Candidate = *CI;
187 CandidateAnchors->second.erase(CI);
188 InsertMatching(Loc, Candidate);
190 <<
" is matched from " << Loc <<
" to " << Candidate
192 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
198 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
199 I < LastMatchedNonAnchors.
size();
I++) {
200 const auto &
L = LastMatchedNonAnchors[
I];
201 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
202 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
203 InsertMatching(L, Candidate);
205 <<
" to " << Candidate <<
"\n");
208 IsMatchedAnchor =
true;
209 LastMatchedNonAnchors.
clear();
214 if (!IsMatchedAnchor) {
217 InsertMatching(Loc, Candidate);
219 << Candidate <<
"\n");
225void SampleProfileMatcher::runOnFunction(
Function &
F) {
232 const auto *FSFlattened = getFlattenedSamplesFor(
F);
239 std::map<LineLocation, StringRef> IRAnchors;
240 findIRAnchors(
F, IRAnchors);
243 std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
244 findProfileAnchors(*FSFlattened, ProfileAnchors);
248 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
nullptr);
260 F.addFnAttr(
"profile-checksum-mismatch");
264 auto &IRToProfileLocationMap = getIRToProfileLocationMap(
F);
265 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
266 IRToProfileLocationMap);
269 recordCallsiteMatchStates(
F, IRAnchors, ProfileAnchors,
270 &IRToProfileLocationMap);
274void SampleProfileMatcher::recordCallsiteMatchStates(
275 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
276 const std::map<
LineLocation, std::unordered_set<FunctionId>>
279 bool IsPostMatch = IRToProfileLocationMap !=
nullptr;
280 auto &CallsiteMatchStates =
283 auto MapIRLocToProfileLoc = [&](
const LineLocation &IRLoc) {
285 if (!IRToProfileLocationMap)
287 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
288 if (ProfileLoc != IRToProfileLocationMap->end())
289 return ProfileLoc->second;
294 for (
const auto &
I : IRAnchors) {
297 const auto &ProfileLoc = MapIRLocToProfileLoc(
I.first);
298 const auto &IRCalleeName =
I.second;
299 const auto &It = ProfileAnchors.find(ProfileLoc);
300 if (It == ProfileAnchors.end())
302 const auto &Callees = It->second;
304 bool IsCallsiteMatched =
false;
309 if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
310 IsCallsiteMatched =
true;
311 else if (Callees.size() == 1 && Callees.count(
getRepInFormat(IRCalleeName)))
312 IsCallsiteMatched =
true;
314 if (IsCallsiteMatched) {
315 auto It = CallsiteMatchStates.find(ProfileLoc);
316 if (It == CallsiteMatchStates.end())
317 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
318 else if (IsPostMatch) {
319 if (It->second == MatchState::InitialMatch)
320 It->second = MatchState::UnchangedMatch;
321 else if (It->second == MatchState::InitialMismatch)
322 It->second = MatchState::RecoveredMismatch;
329 for (
const auto &
I : ProfileAnchors) {
330 const auto &Loc =
I.first;
331 [[maybe_unused]]
const auto &Callees =
I.second;
332 assert(!Callees.empty() &&
"Callees should not be empty");
333 auto It = CallsiteMatchStates.find(Loc);
334 if (It == CallsiteMatchStates.end())
335 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
336 else if (IsPostMatch) {
339 if (It->second == MatchState::InitialMismatch)
340 It->second = MatchState::UnchangedMismatch;
341 else if (It->second == MatchState::InitialMatch)
342 It->second = MatchState::RemovedMatch;
347void SampleProfileMatcher::countMismatchedFuncSamples(
const FunctionSamples &FS,
349 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getGUID());
356 NumStaleProfileFunc++;
361 MismatchedFunctionSamples +=
FS.getTotalSamples();
370 for (
const auto &
I :
FS.getCallsiteSamples())
371 for (
const auto &CS :
I.second)
372 countMismatchedFuncSamples(CS.second,
false);
375void SampleProfileMatcher::countMismatchedCallsiteSamples(
377 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
379 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
381 const auto &CallsiteMatchStates = It->second;
384 auto It = CallsiteMatchStates.find(Loc);
385 if (It == CallsiteMatchStates.end())
386 return MatchState::Unknown;
390 auto AttributeMismatchedSamples = [&](
const enum MatchState &State,
392 if (isMismatchState(State))
393 MismatchedCallsiteSamples += Samples;
394 else if (State == MatchState::RecoveredMismatch)
395 RecoveredCallsiteSamples += Samples;
400 for (
const auto &
I :
FS.getBodySamples())
401 AttributeMismatchedSamples(findMatchState(
I.first),
I.second.getSamples());
404 for (
const auto &
I :
FS.getCallsiteSamples()) {
405 auto State = findMatchState(
I.first);
407 for (
const auto &CS :
I.second)
408 CallsiteSamples += CS.second.getTotalSamples();
409 AttributeMismatchedSamples(State, CallsiteSamples);
411 if (isMismatchState(State))
417 for (
const auto &CS :
I.second)
418 countMismatchedCallsiteSamples(CS.second);
422void SampleProfileMatcher::countMismatchCallsites(
const FunctionSamples &FS) {
423 auto It = FuncCallsiteMatchStates.find(
FS.getFuncName());
425 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
427 const auto &MatchStates = It->second;
428 [[maybe_unused]]
bool OnInitialState =
429 isInitialState(MatchStates.begin()->second);
430 for (
const auto &
I : MatchStates) {
431 TotalProfiledCallsites++;
433 (OnInitialState ? isInitialState(
I.second) : isFinalState(
I.second)) &&
434 "Profile matching state is inconsistent");
436 if (isMismatchState(
I.second))
437 NumMismatchedCallsites++;
438 else if (
I.second == MatchState::RecoveredMismatch)
439 NumRecoveredCallsites++;
443void SampleProfileMatcher::computeAndReportProfileStaleness() {
448 for (
const auto &
F : M) {
459 TotalFunctionSamples +=
FS->getTotalSamples();
463 countMismatchedFuncSamples(*FS,
true);
466 countMismatchCallsites(*FS);
467 countMismatchedCallsiteSamples(*FS);
472 errs() <<
"(" << NumStaleProfileFunc <<
"/" << TotalProfiledFunc
473 <<
") of functions' profile are invalid and ("
474 << MismatchedFunctionSamples <<
"/" << TotalFunctionSamples
475 <<
") of samples are discarded due to function hash mismatch.\n";
477 errs() <<
"(" << (NumMismatchedCallsites + NumRecoveredCallsites) <<
"/"
478 << TotalProfiledCallsites
479 <<
") of callsites' profile are invalid and ("
480 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) <<
"/"
481 << TotalFunctionSamples
482 <<
") of samples are discarded due to callsite location mismatch.\n";
483 errs() <<
"(" << NumRecoveredCallsites <<
"/"
484 << (NumRecoveredCallsites + NumMismatchedCallsites)
485 <<
") of callsites and (" << RecoveredCallsiteSamples <<
"/"
486 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
487 <<
") of samples are recovered by stale profile matching.\n";
496 ProfStatsVec.
emplace_back(
"NumStaleProfileFunc", NumStaleProfileFunc);
497 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
499 MismatchedFunctionSamples);
500 ProfStatsVec.
emplace_back(
"TotalFunctionSamples", TotalFunctionSamples);
503 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
504 ProfStatsVec.
emplace_back(
"NumRecoveredCallsites", NumRecoveredCallsites);
505 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
507 MismatchedCallsiteSamples);
509 RecoveredCallsiteSamples);
511 auto *MD = MDB.createLLVMStats(ProfStatsVec);
512 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
526 distributeIRToProfileLocationMap();
528 computeAndReportProfileStaleness();
531void SampleProfileMatcher::distributeIRToProfileLocationMap(
533 const auto ProfileMappings = FuncMappings.
find(FS.getFuncName());
534 if (ProfileMappings != FuncMappings.
end()) {
535 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
540 for (
auto &FS : Callees.second) {
541 distributeIRToProfileLocationMap(FS.second);
548void SampleProfileMatcher::distributeIRToProfileLocationMap() {
550 distributeIRToProfileLocationMap(
I.second);
BlockVerifier::State From
Legalize the Machine IR a function s Machine IR
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
cl::opt< bool > ReportProfileStaleness
cl::opt< bool > SalvageStaleProfile
cl::opt< bool > PersistProfileStaleness
This file provides the interface for SampleProfileMatcher.
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
This is an important class for using LLVM in a threaded context.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(StringRef Key)
StringRef - Represent a constant reference to a string, i.e.
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
SampleProfileMap & getProfiles()
Return all the profiles.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
This is an optimization pass for GlobalISel generic memory operations.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static bool skipProfileForFunction(const Function &F)
Represents the relative location of an instruction.