89#include <system_error>
94using namespace sampleprof;
97#define DEBUG_TYPE "sample-profile"
98#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
101 "Number of functions inlined with context sensitive profile");
103 "Number of functions not inlined with context sensitive profile");
105 "Number of functions with CFG mismatched profile");
106STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
108 "Number of inlined callsites with a partial distribution factor");
111 "Number of functions with FDO inline stopped due to min size limit");
113 "Number of functions with FDO inline stopped due to max size limit");
115 NumCSInlinedHitGrowthLimit,
116 "Number of functions with FDO inline stopped due to growth size limit");
133 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
134 "location for sample profile query."));
138 cl::desc(
"Compute and report stale profile statistical metrics."));
142 cl::desc(
"Compute stale profile statistical metrics and write it into the "
143 "native object file(.llvm_stats section)."));
148 "Use flattened profile for stale profile detection and matching."));
152 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
158 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
164 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overriden by profile-sample-accurate. "));
169 cl::desc(
"Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
176 cl::desc(
"Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
182 cl::desc(
"Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
188 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
196 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
197 "pass, and merge (or scale) profiles (as configured by "
198 "--sample-profile-merge-inlinee)."));
203 cl::desc(
"Sort profiled recursion by edge weights."));
207 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
208 "loader inlining."));
212 cl::desc(
"The lower bound of size growth limit for "
213 "proirity-based sample profile loader inlining."));
217 cl::desc(
"The upper bound of size growth limit for "
218 "proirity-based sample profile loader inlining."));
222 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
227 cl::desc(
"Threshold for inlining cold callsites"));
233 "Relative hotness percentage threshold for indirect "
234 "call promotion in proirity-based sample profile loader inlining."));
239 "Skip relative hotness check for ICP up to given number of targets."));
242 "sample-profile-prioritized-inline",
cl::Hidden,
244 cl::desc(
"Use call site prioritized inlining for sample profile loader."
245 "Currently only CSSPGO is supported."));
250 cl::desc(
"Use the preinliner decisions stored in profile context."));
253 "sample-profile-recursive-inline",
cl::Hidden,
255 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
260 "Optimization remarks file containing inline remarks to be replayed "
261 "by inlining from sample profile loader."),
265 "sample-profile-inline-replay-scope",
266 cl::init(ReplayInlinerSettings::Scope::Function),
268 "Replay on functions that have remarks associated "
269 "with them (default)"),
270 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
271 "Replay on the entire module")),
272 cl::desc(
"Whether inline replay should be applied to the entire "
273 "Module or just the Functions (default) that are present as "
274 "callers in remarks during sample profile inlining."),
278 "sample-profile-inline-replay-fallback",
279 cl::init(ReplayInlinerSettings::Fallback::Original),
282 ReplayInlinerSettings::Fallback::Original,
"Original",
283 "All decisions not in replay send to original advisor (default)"),
284 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
285 "AlwaysInline",
"All decisions not in replay are inlined"),
286 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
287 "All decisions not in replay are not inlined")),
288 cl::desc(
"How sample profile inline replay treats sites that don't come "
289 "from the replay. Original: defers to original advisor, "
290 "AlwaysInline: inline all sites not in replay, NeverInline: "
291 "inline no sites not in replay"),
295 "sample-profile-inline-replay-format",
296 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
298 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
299 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
300 "<Line Number>:<Column Number>"),
301 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
302 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
303 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
304 "LineColumnDiscriminator",
305 "<Line Number>:<Column Number>.<Discriminator> (default)")),
310 cl::desc(
"Max number of promotions for a single indirect "
311 "call callsite in sample profile loader"));
315 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
319 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
320 "sample-profile inline pass name."));
330using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
335class GUIDToFuncNameMapper {
340 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
344 for (
const auto &
F : CurrentModule) {
346 CurrentGUIDToFuncNameMap.insert(
347 {Function::getGUID(OrigName), OrigName});
357 if (CanonName != OrigName)
358 CurrentGUIDToFuncNameMap.insert(
359 {Function::getGUID(CanonName), CanonName});
363 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
366 ~GUIDToFuncNameMapper() {
370 CurrentGUIDToFuncNameMap.clear();
374 SetGUIDToFuncNameMapForAll(
nullptr);
379 std::queue<FunctionSamples *> FSToUpdate;
381 FSToUpdate.push(&IFS.second);
384 while (!FSToUpdate.empty()) {
387 FS->GUIDToFuncNameMap = Map;
388 for (
const auto &ICS : FS->getCallsiteSamples()) {
390 for (
const auto &IFS : FSMap) {
392 FSToUpdate.push(&FS);
404struct InlineCandidate {
414 float CallsiteDistribution;
418struct CandidateComparer {
419 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
420 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
421 return LHS.CallsiteCount <
RHS.CallsiteCount;
425 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
436using CandidateQueue =
441class SampleProfileMatcher {
452 uint64_t TotalProfiledCallsites = 0;
453 uint64_t NumMismatchedCallsites = 0;
454 uint64_t MismatchedCallsiteSamples = 0;
458 uint64_t MismatchedFuncHashSamples = 0;
464 :
M(
M), Reader(Reader), ProbeManager(ProbeManager) {
475 auto It = FlattenedProfiles.find(CanonFName);
476 if (It != FlattenedProfiles.end())
481 void countProfileMismatches(
483 const std::unordered_set<LineLocation, LineLocationHash>
484 &MatchedCallsiteLocs,
490 return Ret.first->second;
492 void distributeIRToProfileLocationMap();
494 void populateProfileCallsites(
496 StringMap<std::set<LineLocation>> &CalleeToCallsitesMap);
497 void runStaleProfileMatching(
498 const std::map<LineLocation, StringRef> &IRLocations,
499 StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
508class SampleProfileLoader final
519 GetAC(
std::
move(GetAssumptionCache)),
520 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
538 std::vector<const FunctionSamples *>
545 bool tryPromoteAndInlineCandidate(
551 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
552 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
553 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
554 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
556 tryInlineCandidate(InlineCandidate &Candidate,
559 inlineHotFunctionsWithPriority(
Function &
F,
563 void emitOptimizationRemarksForInlineCandidates(
566 void promoteMergeNotInlinedContextSamples(
570 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
571 void generateMDProfMetadata(
Function &
F);
584 std::unique_ptr<SampleContextTracker> ContextTracker;
592 const std::string AnnotatedPassName;
596 std::unique_ptr<ProfileSymbolList> PSL;
607 struct NotInlinedProfileInfo {
625 bool ProfAccForSymsInList;
628 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
631 std::unique_ptr<SampleProfileMatcher> MatchingManager;
634 const char *getAnnotatedRemarkPassName()
const {
635 return AnnotatedPassName.c_str();
642 return getProbeWeight(Inst);
646 return std::error_code();
651 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
652 return std::error_code();
661 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
662 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
665 return getInstWeightImpl(Inst);
681SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
689 CalleeName =
Callee->getName();
692 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
705std::vector<const FunctionSamples *>
706SampleProfileLoader::findIndirectCallFunctionSamples(
709 std::vector<const FunctionSamples *>
R;
716 assert(L && R &&
"Expect non-null FunctionSamples");
717 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
718 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
725 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
726 if (CalleeSamples.empty())
732 for (
const auto *
const FS : CalleeSamples) {
733 Sum +=
FS->getHeadSamplesEstimate();
745 auto T =
FS->findCallTargetMapAt(CallSite);
748 for (
const auto &T_C :
T.get())
753 for (
const auto &NameFS : *M) {
754 Sum += NameFS.second.getHeadSamplesEstimate();
755 R.push_back(&NameFS.second);
763SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
774 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
777 it.first->second = ContextTracker->getContextSamplesFor(DIL);
780 Samples->findFunctionSamples(DIL, Reader->
getRemapper());
782 return it.first->second;
795 std::unique_ptr<InstrProfValueData[]> ValueData =
799 ValueData.get(), NumVals, TotalCount,
true);
805 unsigned NumPromoted = 0;
813 if (ValueData[
I].
Value == Function::getGUID(Candidate))
842 std::unique_ptr<InstrProfValueData[]> ValueData =
846 ValueData.get(), NumVals, OldSum,
true);
852 "If sum is 0, assume only one element in CallTargets "
853 "with count being NOMORE_ICP_MAGICNUM");
857 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
864 OldSum -= Pair.first->second;
874 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
878 for (
const auto &
Data : CallTargets) {
885 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
891 for (
const auto &ValueCount : ValueCountMap) {
893 InstrProfValueData{ValueCount.first, ValueCount.second});
897 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
898 if (L.Count != R.Count)
899 return L.Count > R.Count;
900 return L.Value > R.Value;
906 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
919bool SampleProfileLoader::tryPromoteAndInlineCandidate(
930 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
935 auto &CI = *Candidate.CallInstr;
939 const char *Reason =
"Callee function not available";
946 if (!
R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
947 R->getValue()->hasFnAttribute(
"use-sample-profile") &&
956 CI,
R->getValue(), Candidate.CallsiteCount, Sum,
false, ORE);
958 Sum -= Candidate.CallsiteCount;
971 Candidate.CallInstr = DI;
972 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
973 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
978 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
985 << Candidate.CalleeSamples->getFuncName() <<
" because "
1005 if (
Cost.isAlways())
1011void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1014 for (
auto *
I : Candidates) {
1015 Function *CalledFunction =
I->getCalledFunction();
1016 if (CalledFunction) {
1018 "InlineAttempt",
I->getDebugLoc(),
1020 <<
"previous inlining reattempted for "
1021 << (
Hot ?
"hotness: '" :
"size: '")
1022 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1028void SampleProfileLoader::findExternalInlineCandidate(
1035 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1047 assert(Samples &&
"expect non-null caller profile");
1057 std::queue<ContextTrieNode *> CalleeList;
1058 CalleeList.push(Caller);
1059 while (!CalleeList.empty()) {
1079 if (!Func ||
Func->isDeclaration())
1085 for (
const auto &TS : BS.second.getCallTargets())
1086 if (TS.getValue() > Threshold) {
1097 for (
auto &Child :
Node->getAllChildContext()) {
1099 CalleeList.push(CalleeNode);
1126bool SampleProfileLoader::inlineHotFunctions(
1130 assert((!ProfAccForSymsInList ||
1132 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1133 "ProfAccForSymsInList should be false when profile-sample-accurate "
1137 bool Changed =
false;
1138 bool LocalChanged =
true;
1139 while (LocalChanged) {
1140 LocalChanged =
false;
1142 for (
auto &BB :
F) {
1146 for (
auto &
I : BB) {
1148 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1149 if (!isa<IntrinsicInst>(
I)) {
1150 if ((FS = findCalleeFunctionSamples(*CB))) {
1152 "GUIDToFuncNameMap has to be populated");
1154 if (
FS->getHeadSamplesEstimate() > 0 ||
1156 LocalNotInlinedCallSites.
insert({CB,
FS});
1159 else if (shouldInlineColdCallee(*CB))
1161 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1167 if (
Hot || ExternalInlineAdvisor) {
1169 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1172 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1176 Function *CalledFunction =
I->getCalledFunction();
1177 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1181 if (CalledFunction == &
F)
1183 if (
I->isIndirectCall()) {
1185 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1187 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1188 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1189 PSI->getOrCompHotCountThreshold());
1195 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1196 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1197 LocalNotInlinedCallSites.
erase(
I);
1198 LocalChanged =
true;
1201 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1203 if (tryInlineCandidate(Candidate)) {
1204 LocalNotInlinedCallSites.
erase(
I);
1205 LocalChanged =
true;
1207 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1208 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1209 InlinedGUIDs, SymbolMap,
1210 PSI->getOrCompHotCountThreshold());
1213 Changed |= LocalChanged;
1219 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1223bool SampleProfileLoader::tryInlineCandidate(
1230 CallBase &CB = *Candidate.CallInstr;
1232 assert(CalledFunction &&
"Expect a callee with definition");
1237 if (
Cost.isNever()) {
1239 "InlineFail", DLoc, BB)
1240 <<
"incompatible inlining");
1248 IFI.UpdateProfile =
false;
1251 if (!
IR.isSuccess())
1256 Cost,
true, getAnnotatedRemarkPassName());
1259 if (InlinedCallSites) {
1260 InlinedCallSites->
clear();
1261 for (
auto &
I : IFI.InlinedCallSites)
1266 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1276 if (Candidate.CallsiteDistribution < 1) {
1277 for (
auto &
I : IFI.InlinedCallSites) {
1280 Candidate.CallsiteDistribution);
1282 NumDuplicatedInlinesite++;
1288bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1290 assert(CB &&
"Expect non-null call instruction");
1292 if (isa<IntrinsicInst>(CB))
1296 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1299 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1303 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1304 Factor = Probe->Factor;
1308 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1312std::optional<InlineCost>
1313SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1314 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1315 if (ExternalInlineAdvisor) {
1316 Advice = ExternalInlineAdvisor->getAdvice(CB);
1318 if (!Advice->isInliningRecommended()) {
1319 Advice->recordUnattemptedInlining();
1322 Advice->recordInlining();
1330bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1331 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1336SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1337 if (std::optional<InlineCost> ReplayCost =
1338 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1344 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1351 assert(
Callee &&
"Expect a definition for inline candidate of direct call");
1364 GetTTI(*
Callee), GetAC, GetTLI);
1367 if (
Cost.isNever() ||
Cost.isAlways())
1400bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1404 assert((!ProfAccForSymsInList ||
1406 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1407 "ProfAccForSymsInList should be false when profile-sample-accurate "
1412 CandidateQueue CQueue;
1413 InlineCandidate NewCandidate;
1414 for (
auto &BB :
F) {
1415 for (
auto &
I : BB) {
1416 auto *CB = dyn_cast<CallBase>(&
I);
1419 if (getInlineCandidate(&NewCandidate, CB))
1420 CQueue.push(NewCandidate);
1429 "Max inline size limit should not be smaller than min inline size "
1434 if (ExternalInlineAdvisor)
1435 SizeLimit = std::numeric_limits<unsigned>::max();
1440 bool Changed =
false;
1441 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1442 InlineCandidate Candidate = CQueue.top();
1445 Function *CalledFunction =
I->getCalledFunction();
1447 if (CalledFunction == &
F)
1449 if (
I->isIndirectCall()) {
1451 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1453 Sum *= Candidate.CallsiteDistribution;
1454 unsigned ICPCount = 0;
1455 for (
const auto *FS : CalleeSamples) {
1457 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1458 findExternalInlineCandidate(
I, FS, InlinedGUIDs, SymbolMap,
1459 PSI->getOrCompHotCountThreshold());
1463 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1478 if (!PSI->isHotCount(EntryCountDistributed))
1483 Candidate = {
I,
FS, EntryCountDistributed,
1484 Candidate.CallsiteDistribution};
1485 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1486 &InlinedCallSites)) {
1487 for (
auto *CB : InlinedCallSites) {
1488 if (getInlineCandidate(&NewCandidate, CB))
1489 CQueue.emplace(NewCandidate);
1493 }
else if (!ContextTracker) {
1494 LocalNotInlinedCallSites.
insert({
I,
FS});
1497 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1500 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1501 for (
auto *CB : InlinedCallSites) {
1502 if (getInlineCandidate(&NewCandidate, CB))
1503 CQueue.emplace(NewCandidate);
1506 }
else if (!ContextTracker) {
1507 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1509 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1510 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1511 InlinedGUIDs, SymbolMap,
1512 PSI->getOrCompHotCountThreshold());
1516 if (!CQueue.empty()) {
1518 ++NumCSInlinedHitMaxLimit;
1520 ++NumCSInlinedHitMinLimit;
1522 ++NumCSInlinedHitGrowthLimit;
1528 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1532void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1536 for (
const auto &Pair : NonInlinedCallSites) {
1544 I->getDebugLoc(),
I->getParent())
1545 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee",
Callee)
1546 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1550 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1564 if (
FS->getHeadSamples() == 0) {
1568 FS->getHeadSamplesEstimate());
1574 OutlineFS->
merge(*FS, 1);
1580 notInlinedCallInfo.try_emplace(
Callee, NotInlinedProfileInfo{0});
1581 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1599void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1602 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1605 for (
auto &BI :
F) {
1608 if (BlockWeights[BB]) {
1609 for (
auto &
I : *BB) {
1610 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1621 auto T =
FS->findCallTargetMapAt(CallSite);
1622 if (!
T ||
T.get().empty())
1629 if (Probe->Factor < 1)
1636 for (
const auto &
C :
T.get())
1643 FS->findFunctionSamplesMapAt(CallSite)) {
1644 for (
const auto &NameFS : *M)
1645 Sum += NameFS.second.getHeadSamplesEstimate();
1651 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1652 }
else if (!isa<IntrinsicInst>(&
I)) {
1653 I.setMetadata(LLVMContext::MD_prof,
1654 MDB.createBranchWeights(
1655 {static_cast<uint32_t>(BlockWeights[BB])}));
1661 for (
auto &
I : *BB) {
1662 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1664 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1666 I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1674 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1675 !isa<IndirectBrInst>(TI))
1681 :
Twine(
"<UNKNOWN LOCATION>"))
1690 std::vector<uint64_t> EdgeIndex;
1695 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1696 EdgeMultiplicity[Succ]++;
1701 Edge
E = std::make_pair(BB, Succ);
1707 if (Weight > std::numeric_limits<uint32_t>::max()) {
1709 Weight = std::numeric_limits<uint32_t>::max();
1718 uint64_t W = Weight / EdgeMultiplicity[Succ];
1720 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1725 if (Weight > MaxWeight) {
1727 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1743 if (MaxWeight > 0 &&
1746 TI->
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1749 <<
"most popular destination for conditional branches at "
1750 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1769bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1770 bool Changed =
false;
1775 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1776 <<
F.getName() <<
"\n");
1777 ++NumMismatchedProfile;
1781 ++NumMatchedProfile;
1783 if (getFunctionLoc(
F) == 0)
1787 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1792 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1794 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1796 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1799 generateMDProfMetadata(
F);
1801 emitCoverageRemarks(
F);
1805std::unique_ptr<ProfiledCallGraph>
1806SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1807 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1809 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1811 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->
getProfiles());
1817 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
1825std::vector<Function *>
1827 std::vector<Function *> FunctionOrderList;
1828 FunctionOrderList.reserve(
M.size());
1831 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1832 "together with -sample-profile-top-down-load.\n";
1845 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1846 FunctionOrderList.push_back(&
F);
1847 return FunctionOrderList;
1900 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1909 for (
auto *
Node : Range) {
1911 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1912 FunctionOrderList.push_back(
F);
1922 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1923 FunctionOrderList.push_back(&
F);
1929 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1932 dbgs() <<
"Function processing order:\n";
1933 for (
auto F : FunctionOrderList) {
1934 dbgs() <<
F->getName() <<
"\n";
1938 return FunctionOrderList;
1941bool SampleProfileLoader::doInitialization(
Module &M,
1943 auto &Ctx =
M.getContext();
1946 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1947 if (std::error_code EC = ReaderOrErr.getError()) {
1948 std::string
Msg =
"Could not open profile: " +
EC.message();
1952 Reader = std::move(ReaderOrErr.get());
1953 Reader->
setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1957 if (std::error_code EC = Reader->
read()) {
1958 std::string
Msg =
"profile reading failed: " +
EC.message();
1966 ProfAccForSymsInList =
1968 if (ProfAccForSymsInList) {
1969 NamesInProfile.clear();
1971 NamesInProfile.insert(NameTable->begin(), NameTable->end());
1972 CoverageTracker.setProfAccForSymsInList(
true);
1977 M, *
FAM, Ctx,
nullptr,
1982 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2022 ContextTracker = std::make_unique<SampleContextTracker>(
2028 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2031 "Pseudo-probe-based profile requires SampleProfileProbePass";
2041 std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
2047void SampleProfileMatcher::countProfileMismatches(
2049 const std::unordered_set<LineLocation, LineLocationHash>
2050 &MatchedCallsiteLocs,
2053 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
2054 return LineOffset & 0x8000;
2059 for (
auto &
I :
FS.getBodySamples()) {
2065 if (!
I.second.getCallTargets().empty()) {
2066 TotalCallsiteSamples += Count;
2067 FuncProfiledCallsites++;
2068 if (!MatchedCallsiteLocs.count(Loc)) {
2069 MismatchedCallsiteSamples += Count;
2070 FuncMismatchedCallsites++;
2075 for (
auto &
I :
FS.getCallsiteSamples()) {
2081 for (
auto &FM :
I.second) {
2082 Count += FM.second.getHeadSamplesEstimate();
2084 TotalCallsiteSamples += Count;
2085 FuncProfiledCallsites++;
2086 if (!MatchedCallsiteLocs.count(Loc)) {
2087 MismatchedCallsiteSamples += Count;
2088 FuncMismatchedCallsites++;
2094void SampleProfileMatcher::populateProfileCallsites(
2096 StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) {
2097 for (
const auto &
I :
FS.getBodySamples()) {
2098 const auto &Loc =
I.first;
2099 const auto &CTM =
I.second.getCallTargets();
2101 if (CTM.size() == 1) {
2103 const auto &Candidates = CalleeToCallsitesMap.try_emplace(
2104 CalleeName, std::set<LineLocation>());
2105 Candidates.first->second.insert(Loc);
2109 for (
const auto &
I :
FS.getCallsiteSamples()) {
2111 const auto &CalleeMap =
I.second;
2113 if (CalleeMap.size() == 1) {
2115 const auto &Candidates = CalleeToCallsitesMap.try_emplace(
2116 CalleeName, std::set<LineLocation>());
2117 Candidates.first->second.insert(Loc);
2139void SampleProfileMatcher::runStaleProfileMatching(
2140 const std::map<LineLocation, StringRef> &IRLocations,
2141 StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
2143 assert(IRToProfileLocationMap.empty() &&
2144 "Run stale profile matching only once per function");
2149 IRToProfileLocationMap.insert({
From, To});
2153 int32_t LocationDelta = 0;
2156 for (
const auto &
IR : IRLocations) {
2157 const auto &Loc =
IR.first;
2159 bool IsMatchedAnchor =
false;
2161 if (!CalleeName.
empty()) {
2162 auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName);
2163 if (ProfileAnchors != CalleeToCallsitesMap.end() &&
2164 !ProfileAnchors->second.empty()) {
2165 auto CI = ProfileAnchors->second.begin();
2166 const auto Candidate = *CI;
2167 ProfileAnchors->second.erase(CI);
2168 InsertMatching(Loc, Candidate);
2170 <<
" is matched from " << Loc <<
" to " << Candidate
2172 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
2178 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
2179 I < LastMatchedNonAnchors.
size();
I++) {
2180 const auto &
L = LastMatchedNonAnchors[
I];
2181 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
2182 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
2183 InsertMatching(L, Candidate);
2184 LLVM_DEBUG(
dbgs() <<
"Location is rematched backwards from " << L
2185 <<
" to " << Candidate <<
"\n");
2188 IsMatchedAnchor =
true;
2189 LastMatchedNonAnchors.
clear();
2194 if (!IsMatchedAnchor) {
2197 InsertMatching(Loc, Candidate);
2198 LLVM_DEBUG(
dbgs() <<
"Location is matched from " << Loc <<
" to "
2199 << Candidate <<
"\n");
2205void SampleProfileMatcher::runOnFunction(
const Function &
F,
2207 bool IsFuncHashMismatch =
false;
2210 TotalFuncHashSamples += Count;
2211 TotalProfiledFunc++;
2213 MismatchedFuncHashSamples += Count;
2214 NumMismatchedFuncHash++;
2215 IsFuncHashMismatch =
true;
2219 std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
2222 std::map<LineLocation, StringRef> IRLocations;
2225 for (
auto &BB :
F) {
2226 for (
auto &
I : BB) {
2233 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
2236 const auto *CB = dyn_cast<CallBase>(&
I);
2237 if (
auto &DLoc =
I.getDebugLoc()) {
2246 auto R = IRLocations.emplace(IRCallsite, CalleeName);
2247 R.first->second = CalleeName;
2249 R.first->second == CalleeName) &&
2250 "Overwrite non-call or different callee name location for "
2251 "pseudo probe callsite");
2255 const auto CTM =
FS.findCallTargetMapAt(IRCallsite);
2256 const auto CallsiteFS =
FS.findFunctionSamplesMapAt(IRCallsite);
2259 if (CalleeName.
empty()) {
2264 if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
2265 MatchedCallsiteLocs.insert(IRCallsite);
2268 if ((CTM && CTM->count(CalleeName)) ||
2269 (CallsiteFS && CallsiteFS->count(CalleeName)))
2270 MatchedCallsiteLocs.insert(IRCallsite);
2278 uint64_t FuncMismatchedCallsites = 0;
2279 uint64_t FuncProfiledCallsites = 0;
2280 countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites,
2281 FuncProfiledCallsites);
2282 TotalProfiledCallsites += FuncProfiledCallsites;
2283 NumMismatchedCallsites += FuncMismatchedCallsites;
2286 FuncMismatchedCallsites)
2287 dbgs() <<
"Function checksum is matched but there are "
2288 << FuncMismatchedCallsites <<
"/" << FuncProfiledCallsites
2289 <<
" mismatched callsites.\n";
2298 populateProfileCallsites(FS, CalleeToCallsitesMap);
2302 auto &IRToProfileLocationMap = getIRToProfileLocationMap(
F);
2304 runStaleProfileMatching(IRLocations, CalleeToCallsitesMap,
2305 IRToProfileLocationMap);
2309void SampleProfileMatcher::runOnModule() {
2311 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
2315 FS = getFlattenedSamplesFor(
F);
2323 distributeIRToProfileLocationMap();
2327 errs() <<
"(" << NumMismatchedFuncHash <<
"/" << TotalProfiledFunc <<
")"
2328 <<
" of functions' profile are invalid and "
2329 <<
" (" << MismatchedFuncHashSamples <<
"/" << TotalFuncHashSamples
2331 <<
" of samples are discarded due to function hash mismatch.\n";
2333 errs() <<
"(" << NumMismatchedCallsites <<
"/" << TotalProfiledCallsites
2335 <<
" of callsites' profile are invalid and "
2336 <<
"(" << MismatchedCallsiteSamples <<
"/" << TotalCallsiteSamples
2338 <<
" of samples are discarded due to callsite location mismatch.\n";
2347 ProfStatsVec.
emplace_back(
"NumMismatchedFuncHash", NumMismatchedFuncHash);
2348 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
2350 MismatchedFuncHashSamples);
2351 ProfStatsVec.
emplace_back(
"TotalFuncHashSamples", TotalFuncHashSamples);
2354 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
2355 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
2357 MismatchedCallsiteSamples);
2358 ProfStatsVec.
emplace_back(
"TotalCallsiteSamples", TotalCallsiteSamples);
2360 auto *MD = MDB.createLLVMStats(ProfStatsVec);
2361 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
2362 NMD->addOperand(MD);
2366void SampleProfileMatcher::distributeIRToProfileLocationMap(
2368 const auto ProfileMappings = FuncMappings.
find(
FS.getName());
2369 if (ProfileMappings != FuncMappings.
end()) {
2370 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
2373 for (
auto &Inlinees :
FS.getCallsiteSamples()) {
2374 for (
auto FS : Inlinees.second) {
2375 distributeIRToProfileLocationMap(
FS.second);
2382void SampleProfileMatcher::distributeIRToProfileLocationMap() {
2384 distributeIRToProfileLocationMap(
I.second);
2391 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2394 if (
M.getProfileSummary(
false) ==
nullptr) {
2401 TotalCollectedSamples +=
I.second.getTotalSamples();
2405 for (
const auto &N_F :
M.getValueSymbolTable()) {
2407 Function *
F = dyn_cast<Function>(N_F.getValue());
2408 if (
F ==
nullptr || OrigName.
empty())
2412 if (OrigName != NewName && !NewName.
empty()) {
2419 r.first->second =
nullptr;
2424 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2425 if (*MapName != OrigName && !MapName->empty())
2431 "No empty StringRef should be added in SymbolMap");
2435 MatchingManager->runOnModule();
2438 bool retval =
false;
2439 for (
auto *
F : buildFunctionOrder(M, CG)) {
2441 clearFunctionData();
2447 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2455 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2456 DILocation2SampleMap.clear();
2467 initialEntryCount = 0;
2470 ProfAccForSymsInList =
false;
2472 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2478 if (ProfAccForSymsInList) {
2480 if (PSL->contains(
F.getName()))
2481 initialEntryCount = 0;
2494 if (NamesInProfile.count(CanonName))
2495 initialEntryCount = -1;
2500 if (!
F.getEntryCount())
2502 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2509 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2510 ORE = OwnedORE.get();
2514 Samples = ContextTracker->getBaseSamplesFor(
F);
2518 if (Samples && !Samples->
empty())
2519 return emitAnnotations(
F);
2525 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2526 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2546 SampleProfileLoader SampleLoader(
2549 : ProfileRemappingFileName,
2550 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2552 if (!SampleLoader.doInitialization(M, &
FAM))
2557 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
amdgpu Simplify well known AMD library false FunctionCallee Callee
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > FlattenProfileForMatching("flatten-profile-for-matching", cl::Hidden, cl::init(true), cl::desc("Use flattened profile for stale profile detection and matching."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
static cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
static cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
const BasicBlock * getParent() const
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
iterator find(StringRef Key)
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
Representation of the samples collected for a function.
static uint64_t getGUID(StringRef Name)
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
StringRef getFuncName() const
Return the original function name.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void SetContextSynthetic()
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
StringRef getName() const
Return the function name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Sample-based profile reader.
SampleProfileMap & getProfiles()
Return all the profiles.
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
std::error_code read()
The interface to read sample profiles from the associated file.
SampleProfileReaderItaniumRemapper * getRemapper()
void setModule(const Module *Mod)
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
ProfileSummary & getSummary() const
Return the profile summary.
bool profileIsCS() const
Whether input profile is fully context-sensitive.
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
std::unordered_map< SampleContext, FunctionSamples, SampleContext::Hash > SampleProfileMap
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
@ ContextDuplicatedIntoBase
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Used in the streaming interface as the general argument type.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Represents the relative location of an instruction.