90#include <system_error>
95using namespace sampleprof;
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
102 "Number of functions inlined with context sensitive profile");
104 "Number of functions not inlined with context sensitive profile");
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
109 "Number of inlined callsites with a partial distribution factor");
112 "Number of functions with FDO inline stopped due to min size limit");
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
134 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
138 cl::desc(
"Salvage unused profile by matching with new "
139 "functions on call graph."));
143 cl::desc(
"Compute and report stale profile statistical metrics."));
147 cl::desc(
"Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
152 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
158 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
164 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overridden by profile-sample-accurate. "));
169 cl::desc(
"Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
176 cl::desc(
"Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
182 cl::desc(
"Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
188 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
197 "If true, artificially skip inline transformation in sample-loader "
198 "pass, and merge (or scale) profiles (as configured by "
199 "--sample-profile-merge-inlinee)."));
204 cl::desc(
"Sort profiled recursion by edge weights."));
208 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
209 "loader inlining."));
213 cl::desc(
"The lower bound of size growth limit for "
214 "proirity-based sample profile loader inlining."));
218 cl::desc(
"The upper bound of size growth limit for "
219 "proirity-based sample profile loader inlining."));
223 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
228 cl::desc(
"Threshold for inlining cold callsites"));
234 "Relative hotness percentage threshold for indirect "
235 "call promotion in proirity-based sample profile loader inlining."));
240 "Skip relative hotness check for ICP up to given number of targets."));
244 cl::desc(
"A function is considered hot for staleness error check if its "
245 "total sample count is above the specified percentile"));
249 cl::desc(
"Skip the check if the number of hot functions is smaller than "
250 "the specified number."));
254 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
258 "sample-profile-prioritized-inline",
cl::Hidden,
259 cl::desc(
"Use call site prioritized inlining for sample profile loader. "
260 "Currently only CSSPGO is supported."));
264 cl::desc(
"Use the preinliner decisions stored in profile context."));
267 "sample-profile-recursive-inline",
cl::Hidden,
268 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
272 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
277 "Optimization remarks file containing inline remarks to be replayed "
278 "by inlining from sample profile loader."),
282 "sample-profile-inline-replay-scope",
283 cl::init(ReplayInlinerSettings::Scope::Function),
285 "Replay on functions that have remarks associated "
286 "with them (default)"),
287 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
288 "Replay on the entire module")),
289 cl::desc(
"Whether inline replay should be applied to the entire "
290 "Module or just the Functions (default) that are present as "
291 "callers in remarks during sample profile inlining."),
295 "sample-profile-inline-replay-fallback",
296 cl::init(ReplayInlinerSettings::Fallback::Original),
299 ReplayInlinerSettings::Fallback::Original,
"Original",
300 "All decisions not in replay send to original advisor (default)"),
301 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
302 "AlwaysInline",
"All decisions not in replay are inlined"),
303 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
304 "All decisions not in replay are not inlined")),
305 cl::desc(
"How sample profile inline replay treats sites that don't come "
306 "from the replay. Original: defers to original advisor, "
307 "AlwaysInline: inline all sites not in replay, NeverInline: "
308 "inline no sites not in replay"),
312 "sample-profile-inline-replay-format",
313 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
315 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
316 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
317 "<Line Number>:<Column Number>"),
318 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
319 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
320 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
321 "LineColumnDiscriminator",
322 "<Line Number>:<Column Number>.<Discriminator> (default)")),
327 cl::desc(
"Max number of promotions for a single indirect "
328 "call callsite in sample profile loader"));
332 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
336 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
337 "sample-profile inline pass name."));
347using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
352class GUIDToFuncNameMapper {
357 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
361 for (
const auto &
F : CurrentModule) {
363 CurrentGUIDToFuncNameMap.insert(
364 {Function::getGUID(OrigName), OrigName});
374 if (CanonName != OrigName)
375 CurrentGUIDToFuncNameMap.insert(
376 {Function::getGUID(CanonName), CanonName});
380 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
383 ~GUIDToFuncNameMapper() {
387 CurrentGUIDToFuncNameMap.clear();
391 SetGUIDToFuncNameMapForAll(
nullptr);
396 std::queue<FunctionSamples *> FSToUpdate;
398 FSToUpdate.push(&IFS.second);
401 while (!FSToUpdate.empty()) {
404 FS->GUIDToFuncNameMap = Map;
405 for (
const auto &ICS : FS->getCallsiteSamples()) {
407 for (
const auto &IFS : FSMap) {
409 FSToUpdate.push(&FS);
421struct InlineCandidate {
431 float CallsiteDistribution;
435struct CandidateComparer {
436 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
437 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
438 return LHS.CallsiteCount <
RHS.CallsiteCount;
456using CandidateQueue =
474 bool UseFlattenedProfile)
477 GetAC(
std::
move(GetAssumptionCache)),
478 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
479 CG(CG), LTOPhase(LTOPhase),
484 DisableSampleProfileInlining(DisableSampleProfileInlining),
485 UseFlattenedProfile(UseFlattenedProfile) {}
498 std::vector<const FunctionSamples *>
504 bool tryPromoteAndInlineCandidate(
510 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
511 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
512 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
513 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
515 tryInlineCandidate(InlineCandidate &Candidate,
518 inlineHotFunctionsWithPriority(
Function &
F,
522 void emitOptimizationRemarksForInlineCandidates(
525 void promoteMergeNotInlinedContextSamples(
529 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
530 void generateMDProfMetadata(
Function &
F);
533 void removePseudoProbeInstsDiscriminator(
Module &M);
551 std::unique_ptr<SampleContextTracker> ContextTracker;
559 const std::string AnnotatedPassName;
563 std::shared_ptr<ProfileSymbolList> PSL;
574 struct NotInlinedProfileInfo {
597 bool ProfAccForSymsInList;
599 bool DisableSampleProfileInlining;
601 bool UseFlattenedProfile;
604 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
607 std::unique_ptr<SampleProfileMatcher> MatchingManager;
610 const char *getAnnotatedRemarkPassName()
const {
611 return AnnotatedPassName.c_str();
624 const std::vector<const BasicBlockT *> &BasicBlocks,
626 for (
auto &Jump :
Func.Jumps) {
627 const auto *BB = BasicBlocks[Jump.Source];
628 const auto *Succ = BasicBlocks[Jump.Target];
632 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
633 if (isa<InvokeInst>(TI)) {
634 Jump.IsUnlikely =
true;
640 if (isa<UnreachableInst>(SuccTI)) {
641 Jump.IsUnlikely =
true;
662 return getProbeWeight(Inst);
666 return std::error_code();
671 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
672 return std::error_code();
681 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
682 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
685 return getInstWeightImpl(Inst);
701SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
709 CalleeName =
Callee->getName();
712 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
719 CalleeName, Reader->getRemapper(),
720 &FuncNameToProfNameMap);
726std::vector<const FunctionSamples *>
727SampleProfileLoader::findIndirectCallFunctionSamples(
730 std::vector<const FunctionSamples *>
R;
737 assert(L && R &&
"Expect non-null FunctionSamples");
738 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
739 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
740 return L->getGUID() <
R->getGUID();
745 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
746 if (CalleeSamples.empty())
752 for (
const auto *
const FS : CalleeSamples) {
753 Sum +=
FS->getHeadSamplesEstimate();
766 if (
auto T =
FS->findCallTargetMapAt(CallSite))
767 for (
const auto &T_C : *
T)
772 for (
const auto &NameFS : *M) {
773 Sum += NameFS.second.getHeadSamplesEstimate();
774 R.push_back(&NameFS.second);
782SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
793 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
796 it.first->second = ContextTracker->getContextSamplesFor(DIL);
798 it.first->second = Samples->findFunctionSamples(
799 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
801 return it.first->second;
817 if (ValueData.empty())
820 unsigned NumPromoted = 0;
821 for (
const auto &V : ValueData) {
828 if (V.Value == Function::getGUID(Candidate))
863 "If sum is 0, assume only one element in CallTargets "
864 "with count being NOMORE_ICP_MAGICNUM");
866 for (
const auto &V : ValueData)
867 ValueCountMap[V.Value] = V.Count;
873 OldSum -= Pair.first->second;
880 for (
const auto &V : ValueData) {
882 ValueCountMap[V.Value] = V.Count;
885 for (
const auto &Data : CallTargets) {
886 auto Pair = ValueCountMap.
try_emplace(Data.Value, Data.Count);
892 assert(Sum >= Data.Count &&
"Sum should never be less than Data.Count");
898 for (
const auto &ValueCount : ValueCountMap) {
900 InstrProfValueData{ValueCount.first, ValueCount.second});
904 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
905 if (L.Count != R.Count)
906 return L.Count > R.Count;
907 return L.Value > R.Value;
913 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
926bool SampleProfileLoader::tryPromoteAndInlineCandidate(
930 if (DisableSampleProfileInlining)
937 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
942 auto &CI = *Candidate.CallInstr;
946 const char *Reason =
"Callee function not available";
953 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
954 R->second->hasFnAttribute(
"use-sample-profile") &&
963 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
965 Sum -= Candidate.CallsiteCount;
978 Candidate.CallInstr = DI;
979 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
980 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
985 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
993 Candidate.CallInstr->getName())<<
" because "
1004 if (Callee ==
nullptr)
1013 if (
Cost.isAlways())
1019void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1022 for (
auto *
I : Candidates) {
1023 Function *CalledFunction =
I->getCalledFunction();
1024 if (CalledFunction) {
1026 "InlineAttempt",
I->getDebugLoc(),
1028 <<
"previous inlining reattempted for "
1029 << (
Hot ?
"hotness: '" :
"size: '")
1030 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1036void SampleProfileLoader::findExternalInlineCandidate(
1042 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1073 std::queue<ContextTrieNode *> CalleeList;
1074 CalleeList.push(Caller);
1075 while (!CalleeList.empty()) {
1094 if (!Func ||
Func->isDeclaration())
1100 for (
const auto &TS : BS.second.getCallTargets())
1101 if (TS.second > Threshold) {
1103 if (!Callee ||
Callee->isDeclaration())
1104 InlinedGUIDs.
insert(TS.first.getHashCode());
1111 for (
auto &Child :
Node->getAllChildContext()) {
1113 CalleeList.push(CalleeNode);
1140bool SampleProfileLoader::inlineHotFunctions(
1144 assert((!ProfAccForSymsInList ||
1146 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1147 "ProfAccForSymsInList should be false when profile-sample-accurate "
1151 bool Changed =
false;
1152 bool LocalChanged =
true;
1153 while (LocalChanged) {
1154 LocalChanged =
false;
1156 for (
auto &BB :
F) {
1160 for (
auto &
I : BB) {
1162 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1163 if (!isa<IntrinsicInst>(
I)) {
1164 if ((FS = findCalleeFunctionSamples(*CB))) {
1166 "GUIDToFuncNameMap has to be populated");
1168 if (
FS->getHeadSamplesEstimate() > 0 ||
1170 LocalNotInlinedCallSites.
insert({CB,
FS});
1173 else if (shouldInlineColdCallee(*CB))
1175 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1181 if (
Hot || ExternalInlineAdvisor) {
1183 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1186 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1190 Function *CalledFunction =
I->getCalledFunction();
1191 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1195 if (CalledFunction == &
F)
1197 if (
I->isIndirectCall()) {
1199 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1201 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1202 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1203 PSI->getOrCompHotCountThreshold());
1209 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1210 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1211 LocalNotInlinedCallSites.
erase(
I);
1212 LocalChanged =
true;
1215 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1217 if (tryInlineCandidate(Candidate)) {
1218 LocalNotInlinedCallSites.
erase(
I);
1219 LocalChanged =
true;
1221 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1222 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1224 PSI->getOrCompHotCountThreshold());
1227 Changed |= LocalChanged;
1233 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1237bool SampleProfileLoader::tryInlineCandidate(
1241 if (DisableSampleProfileInlining)
1244 CallBase &CB = *Candidate.CallInstr;
1246 assert(CalledFunction &&
"Expect a callee with definition");
1251 if (
Cost.isNever()) {
1253 "InlineFail", DLoc, BB)
1254 <<
"incompatible inlining");
1262 IFI.UpdateProfile =
false;
1265 if (!
IR.isSuccess())
1270 Cost,
true, getAnnotatedRemarkPassName());
1273 if (InlinedCallSites) {
1274 InlinedCallSites->
clear();
1275 for (
auto &
I : IFI.InlinedCallSites)
1280 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1290 if (Candidate.CallsiteDistribution < 1) {
1291 for (
auto &
I : IFI.InlinedCallSites) {
1294 Candidate.CallsiteDistribution);
1296 NumDuplicatedInlinesite++;
1302bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1304 assert(CB &&
"Expect non-null call instruction");
1306 if (isa<IntrinsicInst>(CB))
1310 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1313 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1317 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1318 Factor = Probe->Factor;
1322 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1326std::optional<InlineCost>
1327SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1328 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1329 if (ExternalInlineAdvisor) {
1330 Advice = ExternalInlineAdvisor->getAdvice(CB);
1332 if (!Advice->isInliningRecommended()) {
1333 Advice->recordUnattemptedInlining();
1336 Advice->recordInlining();
1344bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1345 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1350SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1351 if (std::optional<InlineCost> ReplayCost =
1352 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1358 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1365 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1378 GetTTI(*Callee), GetAC, GetTLI);
1381 if (
Cost.isNever() ||
Cost.isAlways())
1397 SampleContext &Context = Candidate.CalleeSamples->getContext();
1415bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1419 assert((!ProfAccForSymsInList ||
1421 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1422 "ProfAccForSymsInList should be false when profile-sample-accurate "
1427 CandidateQueue CQueue;
1428 InlineCandidate NewCandidate;
1429 for (
auto &BB :
F) {
1430 for (
auto &
I : BB) {
1431 auto *CB = dyn_cast<CallBase>(&
I);
1434 if (getInlineCandidate(&NewCandidate, CB))
1435 CQueue.push(NewCandidate);
1444 "Max inline size limit should not be smaller than min inline size "
1449 if (ExternalInlineAdvisor)
1450 SizeLimit = std::numeric_limits<unsigned>::max();
1455 bool Changed =
false;
1456 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1457 InlineCandidate Candidate = CQueue.top();
1460 Function *CalledFunction =
I->getCalledFunction();
1462 if (CalledFunction == &
F)
1464 if (
I->isIndirectCall()) {
1466 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1468 Sum *= Candidate.CallsiteDistribution;
1469 unsigned ICPCount = 0;
1470 for (
const auto *FS : CalleeSamples) {
1472 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1473 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1474 PSI->getOrCompHotCountThreshold());
1478 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1493 if (!PSI->isHotCount(EntryCountDistributed))
1498 Candidate = {
I,
FS, EntryCountDistributed,
1499 Candidate.CallsiteDistribution};
1500 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1501 &InlinedCallSites)) {
1502 for (
auto *CB : InlinedCallSites) {
1503 if (getInlineCandidate(&NewCandidate, CB))
1504 CQueue.emplace(NewCandidate);
1508 }
else if (!ContextTracker) {
1509 LocalNotInlinedCallSites.
insert({
I,
FS});
1512 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1515 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1516 for (
auto *CB : InlinedCallSites) {
1517 if (getInlineCandidate(&NewCandidate, CB))
1518 CQueue.emplace(NewCandidate);
1521 }
else if (!ContextTracker) {
1522 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1524 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1525 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1527 PSI->getOrCompHotCountThreshold());
1531 if (!CQueue.empty()) {
1533 ++NumCSInlinedHitMaxLimit;
1535 ++NumCSInlinedHitMinLimit;
1537 ++NumCSInlinedHitGrowthLimit;
1543 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1547void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1551 for (
const auto &Pair : NonInlinedCallSites) {
1554 if (!Callee ||
Callee->isDeclaration())
1559 I->getDebugLoc(),
I->getParent())
1560 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1561 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1565 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1579 if (
FS->getHeadSamples() == 0) {
1583 FS->getHeadSamplesEstimate());
1592 OutlineFS = &OutlineFunctionSamples[
1594 OutlineFS->
merge(*FS, 1);
1600 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1601 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1612 InstrProfValueData{
I.first.getHashCode(),
I.second});
1619void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1622 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1625 for (
auto &BI :
F) {
1628 if (BlockWeights[BB]) {
1629 for (
auto &
I : *BB) {
1630 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1642 FS->findCallTargetMapAt(CallSite);
1643 if (!
T ||
T.get().empty())
1650 if (Probe->Factor < 1)
1657 for (
const auto &
C :
T.get())
1664 FS->findFunctionSamplesMapAt(CallSite)) {
1665 for (
const auto &NameFS : *M)
1666 Sum += NameFS.second.getHeadSamplesEstimate();
1672 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1673 }
else if (!isa<IntrinsicInst>(&
I)) {
1681 for (
auto &
I : *BB) {
1682 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1683 if (cast<CallBase>(
I).isIndirectCall()) {
1684 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1695 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1696 !isa<IndirectBrInst>(TI))
1702 :
Twine(
"<UNKNOWN LOCATION>"))
1711 std::vector<uint64_t> EdgeIndex;
1716 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1717 EdgeMultiplicity[Succ]++;
1722 Edge E = std::make_pair(BB, Succ);
1728 if (Weight > std::numeric_limits<uint32_t>::max()) {
1730 Weight = std::numeric_limits<uint32_t>::max();
1736 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1741 uint64_t W = Weight / EdgeMultiplicity[Succ];
1743 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1748 if (Weight > MaxWeight) {
1750 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1766 if (MaxWeight > 0 &&
1772 <<
"most popular destination for conditional branches at "
1773 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1792bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1793 bool Changed =
false;
1797 if (!ProbeManager->getDesc(
F))
1798 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1802 if (ProbeManager->profileIsValid(
F, *Samples)) {
1803 ++NumMatchedProfile;
1805 ++NumMismatchedProfile;
1807 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1808 <<
F.getName() <<
"\n");
1813 if (getFunctionLoc(
F) == 0)
1817 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1822 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1824 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1826 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1829 generateMDProfMetadata(
F);
1831 emitCoverageRemarks(
F);
1835std::unique_ptr<ProfiledCallGraph>
1836SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1837 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1839 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1841 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1849 ProfiledCG->addProfiledFunction(
1856std::vector<Function *>
1858 std::vector<Function *> FunctionOrderList;
1859 FunctionOrderList.reserve(
M.size());
1862 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1863 "together with -sample-profile-top-down-load.\n";
1877 FunctionOrderList.push_back(&
F);
1878 return FunctionOrderList;
1931 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1943 FunctionOrderList.push_back(
F);
1947 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1952 dbgs() <<
"Function processing order:\n";
1953 for (
auto F : FunctionOrderList) {
1954 dbgs() <<
F->getName() <<
"\n";
1958 return FunctionOrderList;
1961bool SampleProfileLoader::doInitialization(
Module &M,
1963 auto &Ctx =
M.getContext();
1966 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1967 if (std::error_code EC = ReaderOrErr.getError()) {
1968 std::string Msg =
"Could not open profile: " +
EC.message();
1972 Reader = std::move(ReaderOrErr.get());
1973 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1976 Reader->setModule(&M);
1977 if (std::error_code EC = Reader->read()) {
1978 std::string Msg =
"profile reading failed: " +
EC.message();
1983 PSL = Reader->getProfileSymbolList();
1988 if (UseFlattenedProfile)
1990 Reader->profileIsCS());
1993 ProfAccForSymsInList =
1995 if (ProfAccForSymsInList) {
1996 NamesInProfile.clear();
1997 GUIDsInProfile.clear();
1998 if (
auto NameTable = Reader->getNameTable()) {
2000 for (
auto Name : *NameTable)
2001 GUIDsInProfile.insert(
Name.getHashCode());
2003 for (
auto Name : *NameTable)
2004 NamesInProfile.insert(
Name.stringRef());
2007 CoverageTracker.setProfAccForSymsInList(
true);
2012 M, *
FAM, Ctx,
nullptr,
2017 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2021 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2022 Reader->profileIsProbeBased()) {
2038 if (Reader->profileIsPreInlined()) {
2048 if (Reader->profileIsProbeBased() &&
2053 if (!Reader->profileIsCS()) {
2065 if (Reader->profileIsCS()) {
2067 ContextTracker = std::make_unique<SampleContextTracker>(
2068 Reader->getProfiles(), &GUIDToFuncNameMap);
2072 if (Reader->profileIsProbeBased()) {
2073 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2074 if (!ProbeManager->moduleIsProbed(M)) {
2076 "Pseudo-probe-based profile requires SampleProfileProbePass";
2085 MatchingManager = std::make_unique<SampleProfileMatcher>(
2086 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2087 FuncNameToProfNameMap);
2103bool SampleProfileLoader::rejectHighStalenessProfile(
2106 "Only support for probe-based profile");
2109 for (
const auto &
I : Profiles) {
2110 const auto &
FS =
I.second;
2111 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2117 FS.getTotalSamples()))
2121 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2122 NumMismatchedFunc++;
2130 if (NumMismatchedFunc * 100 >=
2132 auto &Ctx =
M.getContext();
2134 "The input profile significantly mismatches current source code. "
2135 "Please recollect profile to avoid performance regression.";
2142void SampleProfileLoader::removePseudoProbeInstsDiscriminator(
Module &M) {
2144 std::vector<Instruction *> InstsToDel;
2145 for (
auto &BB :
F) {
2146 for (
auto &
I : BB) {
2147 if (isa<PseudoProbeInst>(&
I))
2148 InstsToDel.push_back(&
I);
2149 else if (isa<CallBase>(&
I))
2150 if (
const DILocation *DIL =
I.getDebugLoc().get()) {
2154 std::optional<uint32_t> DwarfDiscriminator =
2158 DwarfDiscriminator ? *DwarfDiscriminator : 0));
2163 for (
auto *
I : InstsToDel)
2164 I->eraseFromParent();
2170 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2173 if (
M.getProfileSummary(
false) ==
nullptr) {
2174 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2180 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2184 for (
const auto &
I : Reader->getProfiles())
2185 TotalCollectedSamples +=
I.second.getTotalSamples();
2187 auto Remapper = Reader->getRemapper();
2189 for (
const auto &N_F :
M.getValueSymbolTable()) {
2191 Function *
F = dyn_cast<Function>(N_F.getValue());
2192 if (
F ==
nullptr || OrigName.
empty())
2196 if (OrigName != NewName && !NewName.
empty()) {
2203 r.first->second =
nullptr;
2208 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2209 if (*MapName != OrigName && !MapName->empty())
2218 MatchingManager->runOnModule();
2219 MatchingManager->clearMatchingData();
2222 "No empty StringRef should be added in SymbolMap");
2224 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2227 bool retval =
false;
2228 for (
auto *
F : buildFunctionOrder(M, CG)) {
2230 clearFunctionData();
2236 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2242 removePseudoProbeInstsDiscriminator(M);
2244 M.eraseNamedMetadata(FuncInfo);
2251 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2252 DILocation2SampleMap.clear();
2263 initialEntryCount = 0;
2266 ProfAccForSymsInList =
false;
2268 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2274 if (ProfAccForSymsInList) {
2276 if (PSL->contains(
F.getName()))
2277 initialEntryCount = 0;
2291 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2293 initialEntryCount = -1;
2298 if (!
F.getEntryCount())
2300 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2307 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2308 ORE = OwnedORE.get();
2312 Samples = ContextTracker->getBaseSamplesFor(
F);
2314 Samples = Reader->getSamplesFor(
F);
2319 auto It = OutlineFunctionSamples.find(
FunctionId(CanonName));
2320 if (It != OutlineFunctionSamples.end()) {
2321 Samples = &It->second;
2322 }
else if (
auto Remapper = Reader->getRemapper()) {
2323 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2324 It = OutlineFunctionSamples.find(
FunctionId(*RemppedName));
2325 if (It != OutlineFunctionSamples.end())
2326 Samples = &It->second;
2332 if (Samples && !Samples->
empty())
2333 return emitAnnotations(
F);
2339 bool UseFlattenedProfile)
2340 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2341 LTOPhase(LTOPhase), FS(
std::
move(FS)),
2342 DisableSampleProfileInlining(DisableSampleProfileInlining),
2343 UseFlattenedProfile(UseFlattenedProfile) {}
2364 SampleProfileLoader SampleLoader(
2367 : ProfileRemappingFileName,
2368 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2369 DisableSampleProfileInlining, UseFlattenedProfile);
2370 if (!SampleLoader.doInitialization(M, &
FAM))
2374 if (!SampleLoader.runOnModule(M, &AM, PSI))
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void refresh()
If no summary is present, attempt to refresh.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
FunctionId getFunction() const
Return the function name.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasState(ContextStateMask S)
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > ProfileInlineGrowthLimit
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)