90#include <system_error>
95using namespace sampleprof;
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
102 "Number of functions inlined with context sensitive profile");
104 "Number of functions not inlined with context sensitive profile");
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
109 "Number of inlined callsites with a partial distribution factor");
112 "Number of functions with FDO inline stopped due to min size limit");
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
134 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
138 cl::desc(
"Salvage unused profile by matching with new "
139 "functions on call graph."));
143 cl::desc(
"Compute and report stale profile statistical metrics."));
147 cl::desc(
"Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
152 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
158 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
164 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overriden by profile-sample-accurate. "));
169 cl::desc(
"Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
176 cl::desc(
"Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
182 cl::desc(
"Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
188 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
196 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
197 "pass, and merge (or scale) profiles (as configured by "
198 "--sample-profile-merge-inlinee)."));
203 cl::desc(
"Sort profiled recursion by edge weights."));
207 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
208 "loader inlining."));
212 cl::desc(
"The lower bound of size growth limit for "
213 "proirity-based sample profile loader inlining."));
217 cl::desc(
"The upper bound of size growth limit for "
218 "proirity-based sample profile loader inlining."));
222 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
227 cl::desc(
"Threshold for inlining cold callsites"));
233 "Relative hotness percentage threshold for indirect "
234 "call promotion in proirity-based sample profile loader inlining."));
239 "Skip relative hotness check for ICP up to given number of targets."));
243 cl::desc(
"A function is considered hot for staleness error check if its "
244 "total sample count is above the specified percentile"));
248 cl::desc(
"Skip the check if the number of hot functions is smaller than "
249 "the specified number."));
253 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
257 "sample-profile-prioritized-inline",
cl::Hidden,
258 cl::desc(
"Use call site prioritized inlining for sample profile loader."
259 "Currently only CSSPGO is supported."));
263 cl::desc(
"Use the preinliner decisions stored in profile context."));
266 "sample-profile-recursive-inline",
cl::Hidden,
267 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
271 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
276 "Optimization remarks file containing inline remarks to be replayed "
277 "by inlining from sample profile loader."),
281 "sample-profile-inline-replay-scope",
282 cl::init(ReplayInlinerSettings::Scope::Function),
284 "Replay on functions that have remarks associated "
285 "with them (default)"),
286 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
287 "Replay on the entire module")),
288 cl::desc(
"Whether inline replay should be applied to the entire "
289 "Module or just the Functions (default) that are present as "
290 "callers in remarks during sample profile inlining."),
294 "sample-profile-inline-replay-fallback",
295 cl::init(ReplayInlinerSettings::Fallback::Original),
298 ReplayInlinerSettings::Fallback::Original,
"Original",
299 "All decisions not in replay send to original advisor (default)"),
300 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
301 "AlwaysInline",
"All decisions not in replay are inlined"),
302 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
303 "All decisions not in replay are not inlined")),
304 cl::desc(
"How sample profile inline replay treats sites that don't come "
305 "from the replay. Original: defers to original advisor, "
306 "AlwaysInline: inline all sites not in replay, NeverInline: "
307 "inline no sites not in replay"),
311 "sample-profile-inline-replay-format",
312 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
314 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
315 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
316 "<Line Number>:<Column Number>"),
317 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
318 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
319 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
320 "LineColumnDiscriminator",
321 "<Line Number>:<Column Number>.<Discriminator> (default)")),
326 cl::desc(
"Max number of promotions for a single indirect "
327 "call callsite in sample profile loader"));
331 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
335 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
336 "sample-profile inline pass name."));
346using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
351class GUIDToFuncNameMapper {
356 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
360 for (
const auto &
F : CurrentModule) {
362 CurrentGUIDToFuncNameMap.insert(
363 {Function::getGUID(OrigName), OrigName});
373 if (CanonName != OrigName)
374 CurrentGUIDToFuncNameMap.insert(
375 {Function::getGUID(CanonName), CanonName});
379 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
382 ~GUIDToFuncNameMapper() {
386 CurrentGUIDToFuncNameMap.clear();
390 SetGUIDToFuncNameMapForAll(
nullptr);
395 std::queue<FunctionSamples *> FSToUpdate;
397 FSToUpdate.push(&IFS.second);
400 while (!FSToUpdate.empty()) {
403 FS->GUIDToFuncNameMap = Map;
404 for (
const auto &ICS : FS->getCallsiteSamples()) {
406 for (
const auto &IFS : FSMap) {
408 FSToUpdate.push(&FS);
420struct InlineCandidate {
430 float CallsiteDistribution;
434struct CandidateComparer {
435 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
436 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
437 return LHS.CallsiteCount <
RHS.CallsiteCount;
455using CandidateQueue =
473 bool UseFlattenedProfile)
476 GetAC(
std::
move(GetAssumptionCache)),
477 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
478 CG(CG), LTOPhase(LTOPhase),
483 DisableSampleProfileInlining(DisableSampleProfileInlining),
484 UseFlattenedProfile(UseFlattenedProfile) {}
497 std::vector<const FunctionSamples *>
503 bool tryPromoteAndInlineCandidate(
509 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
510 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
511 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
512 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
514 tryInlineCandidate(InlineCandidate &Candidate,
517 inlineHotFunctionsWithPriority(
Function &
F,
521 void emitOptimizationRemarksForInlineCandidates(
524 void promoteMergeNotInlinedContextSamples(
528 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
529 void generateMDProfMetadata(
Function &
F);
532 void removePseudoProbeInstsDiscriminator(
Module &M);
550 std::unique_ptr<SampleContextTracker> ContextTracker;
558 const std::string AnnotatedPassName;
562 std::shared_ptr<ProfileSymbolList> PSL;
573 struct NotInlinedProfileInfo {
596 bool ProfAccForSymsInList;
598 bool DisableSampleProfileInlining;
600 bool UseFlattenedProfile;
603 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
606 std::unique_ptr<SampleProfileMatcher> MatchingManager;
609 const char *getAnnotatedRemarkPassName()
const {
610 return AnnotatedPassName.c_str();
623 const std::vector<const BasicBlockT *> &BasicBlocks,
625 for (
auto &Jump :
Func.Jumps) {
626 const auto *BB = BasicBlocks[Jump.Source];
627 const auto *Succ = BasicBlocks[Jump.Target];
631 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
632 if (isa<InvokeInst>(TI)) {
633 Jump.IsUnlikely =
true;
639 if (isa<UnreachableInst>(SuccTI)) {
640 Jump.IsUnlikely =
true;
661 return getProbeWeight(Inst);
665 return std::error_code();
670 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
671 return std::error_code();
680 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
681 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
684 return getInstWeightImpl(Inst);
700SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
708 CalleeName =
Callee->getName();
711 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
718 CalleeName, Reader->getRemapper(),
719 &FuncNameToProfNameMap);
725std::vector<const FunctionSamples *>
726SampleProfileLoader::findIndirectCallFunctionSamples(
729 std::vector<const FunctionSamples *>
R;
736 assert(L && R &&
"Expect non-null FunctionSamples");
737 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
738 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
739 return L->getGUID() <
R->getGUID();
744 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
745 if (CalleeSamples.empty())
751 for (
const auto *
const FS : CalleeSamples) {
752 Sum +=
FS->getHeadSamplesEstimate();
765 if (
auto T =
FS->findCallTargetMapAt(CallSite))
766 for (
const auto &T_C : *
T)
771 for (
const auto &NameFS : *M) {
772 Sum += NameFS.second.getHeadSamplesEstimate();
773 R.push_back(&NameFS.second);
781SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
792 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
795 it.first->second = ContextTracker->getContextSamplesFor(DIL);
797 it.first->second = Samples->findFunctionSamples(
798 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
800 return it.first->second;
816 if (ValueData.empty())
819 unsigned NumPromoted = 0;
820 for (
const auto &V : ValueData) {
827 if (V.Value == Function::getGUID(Candidate))
862 "If sum is 0, assume only one element in CallTargets "
863 "with count being NOMORE_ICP_MAGICNUM");
865 for (
const auto &V : ValueData)
866 ValueCountMap[V.Value] = V.Count;
872 OldSum -= Pair.first->second;
879 for (
const auto &V : ValueData) {
881 ValueCountMap[V.Value] = V.Count;
884 for (
const auto &Data : CallTargets) {
885 auto Pair = ValueCountMap.
try_emplace(Data.Value, Data.Count);
891 assert(Sum >= Data.Count &&
"Sum should never be less than Data.Count");
897 for (
const auto &ValueCount : ValueCountMap) {
899 InstrProfValueData{ValueCount.first, ValueCount.second});
903 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
904 if (L.Count != R.Count)
905 return L.Count > R.Count;
906 return L.Value > R.Value;
912 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
925bool SampleProfileLoader::tryPromoteAndInlineCandidate(
929 if (DisableSampleProfileInlining)
936 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
941 auto &CI = *Candidate.CallInstr;
945 const char *Reason =
"Callee function not available";
952 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
953 R->second->hasFnAttribute(
"use-sample-profile") &&
962 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
964 Sum -= Candidate.CallsiteCount;
977 Candidate.CallInstr = DI;
978 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
979 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
984 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
992 Candidate.CallInstr->getName())<<
" because "
1003 if (Callee ==
nullptr)
1012 if (
Cost.isAlways())
1018void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1021 for (
auto *
I : Candidates) {
1022 Function *CalledFunction =
I->getCalledFunction();
1023 if (CalledFunction) {
1025 "InlineAttempt",
I->getDebugLoc(),
1027 <<
"previous inlining reattempted for "
1028 << (
Hot ?
"hotness: '" :
"size: '")
1029 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1035void SampleProfileLoader::findExternalInlineCandidate(
1041 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1072 std::queue<ContextTrieNode *> CalleeList;
1073 CalleeList.push(Caller);
1074 while (!CalleeList.empty()) {
1093 if (!Func ||
Func->isDeclaration())
1099 for (
const auto &TS : BS.second.getCallTargets())
1100 if (TS.second > Threshold) {
1102 if (!Callee ||
Callee->isDeclaration())
1103 InlinedGUIDs.
insert(TS.first.getHashCode());
1110 for (
auto &Child :
Node->getAllChildContext()) {
1112 CalleeList.push(CalleeNode);
1139bool SampleProfileLoader::inlineHotFunctions(
1143 assert((!ProfAccForSymsInList ||
1145 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1146 "ProfAccForSymsInList should be false when profile-sample-accurate "
1150 bool Changed =
false;
1151 bool LocalChanged =
true;
1152 while (LocalChanged) {
1153 LocalChanged =
false;
1155 for (
auto &BB :
F) {
1159 for (
auto &
I : BB) {
1161 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1162 if (!isa<IntrinsicInst>(
I)) {
1163 if ((FS = findCalleeFunctionSamples(*CB))) {
1165 "GUIDToFuncNameMap has to be populated");
1167 if (
FS->getHeadSamplesEstimate() > 0 ||
1169 LocalNotInlinedCallSites.
insert({CB,
FS});
1172 else if (shouldInlineColdCallee(*CB))
1174 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1180 if (
Hot || ExternalInlineAdvisor) {
1182 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1185 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1189 Function *CalledFunction =
I->getCalledFunction();
1190 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1194 if (CalledFunction == &
F)
1196 if (
I->isIndirectCall()) {
1198 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1200 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1201 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1202 PSI->getOrCompHotCountThreshold());
1208 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1209 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1210 LocalNotInlinedCallSites.
erase(
I);
1211 LocalChanged =
true;
1214 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1216 if (tryInlineCandidate(Candidate)) {
1217 LocalNotInlinedCallSites.
erase(
I);
1218 LocalChanged =
true;
1220 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1221 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1223 PSI->getOrCompHotCountThreshold());
1226 Changed |= LocalChanged;
1232 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1236bool SampleProfileLoader::tryInlineCandidate(
1240 if (DisableSampleProfileInlining)
1243 CallBase &CB = *Candidate.CallInstr;
1245 assert(CalledFunction &&
"Expect a callee with definition");
1250 if (
Cost.isNever()) {
1252 "InlineFail", DLoc, BB)
1253 <<
"incompatible inlining");
1261 IFI.UpdateProfile =
false;
1264 if (!
IR.isSuccess())
1269 Cost,
true, getAnnotatedRemarkPassName());
1272 if (InlinedCallSites) {
1273 InlinedCallSites->
clear();
1274 for (
auto &
I : IFI.InlinedCallSites)
1279 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1289 if (Candidate.CallsiteDistribution < 1) {
1290 for (
auto &
I : IFI.InlinedCallSites) {
1293 Candidate.CallsiteDistribution);
1295 NumDuplicatedInlinesite++;
1301bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1303 assert(CB &&
"Expect non-null call instruction");
1305 if (isa<IntrinsicInst>(CB))
1309 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1312 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1316 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1317 Factor = Probe->Factor;
1321 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1325std::optional<InlineCost>
1326SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1327 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1328 if (ExternalInlineAdvisor) {
1329 Advice = ExternalInlineAdvisor->getAdvice(CB);
1331 if (!Advice->isInliningRecommended()) {
1332 Advice->recordUnattemptedInlining();
1335 Advice->recordInlining();
1343bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1344 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1349SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1350 if (std::optional<InlineCost> ReplayCost =
1351 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1357 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1364 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1377 GetTTI(*Callee), GetAC, GetTLI);
1380 if (
Cost.isNever() ||
Cost.isAlways())
1396 SampleContext &Context = Candidate.CalleeSamples->getContext();
1414bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1418 assert((!ProfAccForSymsInList ||
1420 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1421 "ProfAccForSymsInList should be false when profile-sample-accurate "
1426 CandidateQueue CQueue;
1427 InlineCandidate NewCandidate;
1428 for (
auto &BB :
F) {
1429 for (
auto &
I : BB) {
1430 auto *CB = dyn_cast<CallBase>(&
I);
1433 if (getInlineCandidate(&NewCandidate, CB))
1434 CQueue.push(NewCandidate);
1443 "Max inline size limit should not be smaller than min inline size "
1448 if (ExternalInlineAdvisor)
1449 SizeLimit = std::numeric_limits<unsigned>::max();
1454 bool Changed =
false;
1455 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1456 InlineCandidate Candidate = CQueue.top();
1459 Function *CalledFunction =
I->getCalledFunction();
1461 if (CalledFunction == &
F)
1463 if (
I->isIndirectCall()) {
1465 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1467 Sum *= Candidate.CallsiteDistribution;
1468 unsigned ICPCount = 0;
1469 for (
const auto *FS : CalleeSamples) {
1471 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1472 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1473 PSI->getOrCompHotCountThreshold());
1477 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1492 if (!PSI->isHotCount(EntryCountDistributed))
1497 Candidate = {
I,
FS, EntryCountDistributed,
1498 Candidate.CallsiteDistribution};
1499 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1500 &InlinedCallSites)) {
1501 for (
auto *CB : InlinedCallSites) {
1502 if (getInlineCandidate(&NewCandidate, CB))
1503 CQueue.emplace(NewCandidate);
1507 }
else if (!ContextTracker) {
1508 LocalNotInlinedCallSites.
insert({
I,
FS});
1511 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1514 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1515 for (
auto *CB : InlinedCallSites) {
1516 if (getInlineCandidate(&NewCandidate, CB))
1517 CQueue.emplace(NewCandidate);
1520 }
else if (!ContextTracker) {
1521 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1523 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1524 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1526 PSI->getOrCompHotCountThreshold());
1530 if (!CQueue.empty()) {
1532 ++NumCSInlinedHitMaxLimit;
1534 ++NumCSInlinedHitMinLimit;
1536 ++NumCSInlinedHitGrowthLimit;
1542 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1546void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1550 for (
const auto &Pair : NonInlinedCallSites) {
1553 if (!Callee ||
Callee->isDeclaration())
1558 I->getDebugLoc(),
I->getParent())
1559 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1560 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1564 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1578 if (
FS->getHeadSamples() == 0) {
1582 FS->getHeadSamplesEstimate());
1591 OutlineFS = &OutlineFunctionSamples[
1593 OutlineFS->
merge(*FS, 1);
1599 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1600 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1611 InstrProfValueData{
I.first.getHashCode(),
I.second});
1618void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1621 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1624 for (
auto &BI :
F) {
1627 if (BlockWeights[BB]) {
1628 for (
auto &
I : *BB) {
1629 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1641 FS->findCallTargetMapAt(CallSite);
1642 if (!
T ||
T.get().empty())
1649 if (Probe->Factor < 1)
1656 for (
const auto &
C :
T.get())
1663 FS->findFunctionSamplesMapAt(CallSite)) {
1664 for (
const auto &NameFS : *M)
1665 Sum += NameFS.second.getHeadSamplesEstimate();
1671 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1672 }
else if (!isa<IntrinsicInst>(&
I)) {
1680 for (
auto &
I : *BB) {
1681 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1682 if (cast<CallBase>(
I).isIndirectCall()) {
1683 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1694 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1695 !isa<IndirectBrInst>(TI))
1701 :
Twine(
"<UNKNOWN LOCATION>"))
1710 std::vector<uint64_t> EdgeIndex;
1715 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1716 EdgeMultiplicity[Succ]++;
1721 Edge E = std::make_pair(BB, Succ);
1727 if (Weight > std::numeric_limits<uint32_t>::max()) {
1729 Weight = std::numeric_limits<uint32_t>::max();
1735 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1740 uint64_t W = Weight / EdgeMultiplicity[Succ];
1742 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1747 if (Weight > MaxWeight) {
1749 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1765 if (MaxWeight > 0 &&
1771 <<
"most popular destination for conditional branches at "
1772 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1791bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1792 bool Changed =
false;
1796 if (!ProbeManager->getDesc(
F))
1797 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1801 if (ProbeManager->profileIsValid(
F, *Samples)) {
1802 ++NumMatchedProfile;
1804 ++NumMismatchedProfile;
1806 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1807 <<
F.getName() <<
"\n");
1812 if (getFunctionLoc(
F) == 0)
1816 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1821 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1823 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1825 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1828 generateMDProfMetadata(
F);
1830 emitCoverageRemarks(
F);
1834std::unique_ptr<ProfiledCallGraph>
1835SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1836 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1838 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1840 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1848 ProfiledCG->addProfiledFunction(
1855std::vector<Function *>
1857 std::vector<Function *> FunctionOrderList;
1858 FunctionOrderList.reserve(
M.size());
1861 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1862 "together with -sample-profile-top-down-load.\n";
1876 FunctionOrderList.push_back(&
F);
1877 return FunctionOrderList;
1930 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1942 FunctionOrderList.push_back(
F);
1946 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1951 dbgs() <<
"Function processing order:\n";
1952 for (
auto F : FunctionOrderList) {
1953 dbgs() <<
F->getName() <<
"\n";
1957 return FunctionOrderList;
1960bool SampleProfileLoader::doInitialization(
Module &M,
1962 auto &Ctx =
M.getContext();
1965 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1966 if (std::error_code EC = ReaderOrErr.getError()) {
1967 std::string Msg =
"Could not open profile: " +
EC.message();
1971 Reader = std::move(ReaderOrErr.get());
1972 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1975 Reader->setModule(&M);
1976 if (std::error_code EC = Reader->read()) {
1977 std::string Msg =
"profile reading failed: " +
EC.message();
1982 PSL = Reader->getProfileSymbolList();
1987 if (UseFlattenedProfile)
1989 Reader->profileIsCS());
1992 ProfAccForSymsInList =
1994 if (ProfAccForSymsInList) {
1995 NamesInProfile.clear();
1996 GUIDsInProfile.clear();
1997 if (
auto NameTable = Reader->getNameTable()) {
1999 for (
auto Name : *NameTable)
2000 GUIDsInProfile.insert(
Name.getHashCode());
2002 for (
auto Name : *NameTable)
2003 NamesInProfile.insert(
Name.stringRef());
2006 CoverageTracker.setProfAccForSymsInList(
true);
2011 M, *
FAM, Ctx,
nullptr,
2016 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2020 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2021 Reader->profileIsProbeBased()) {
2037 if (Reader->profileIsPreInlined()) {
2047 if (Reader->profileIsProbeBased() &&
2052 if (!Reader->profileIsCS()) {
2064 if (Reader->profileIsCS()) {
2066 ContextTracker = std::make_unique<SampleContextTracker>(
2067 Reader->getProfiles(), &GUIDToFuncNameMap);
2071 if (Reader->profileIsProbeBased()) {
2072 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2073 if (!ProbeManager->moduleIsProbed(M)) {
2075 "Pseudo-probe-based profile requires SampleProfileProbePass";
2084 MatchingManager = std::make_unique<SampleProfileMatcher>(
2085 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2086 FuncNameToProfNameMap);
2102bool SampleProfileLoader::rejectHighStalenessProfile(
2105 "Only support for probe-based profile");
2108 for (
const auto &
I : Profiles) {
2109 const auto &
FS =
I.second;
2110 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2116 FS.getTotalSamples()))
2120 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2121 NumMismatchedFunc++;
2129 if (NumMismatchedFunc * 100 >=
2131 auto &Ctx =
M.getContext();
2133 "The input profile significantly mismatches current source code. "
2134 "Please recollect profile to avoid performance regression.";
2141void SampleProfileLoader::removePseudoProbeInstsDiscriminator(
Module &M) {
2143 std::vector<Instruction *> InstsToDel;
2144 for (
auto &BB :
F) {
2145 for (
auto &
I : BB) {
2146 if (isa<PseudoProbeInst>(&
I))
2147 InstsToDel.push_back(&
I);
2148 else if (isa<CallBase>(&
I))
2149 if (
const DILocation *DIL =
I.getDebugLoc().get()) {
2153 std::optional<uint32_t> DwarfDiscriminator =
2157 DwarfDiscriminator ? *DwarfDiscriminator : 0));
2162 for (
auto *
I : InstsToDel)
2163 I->eraseFromParent();
2169 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2172 if (
M.getProfileSummary(
false) ==
nullptr) {
2173 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2179 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2183 for (
const auto &
I : Reader->getProfiles())
2184 TotalCollectedSamples +=
I.second.getTotalSamples();
2186 auto Remapper = Reader->getRemapper();
2188 for (
const auto &N_F :
M.getValueSymbolTable()) {
2190 Function *
F = dyn_cast<Function>(N_F.getValue());
2191 if (
F ==
nullptr || OrigName.
empty())
2195 if (OrigName != NewName && !NewName.
empty()) {
2202 r.first->second =
nullptr;
2207 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2208 if (*MapName != OrigName && !MapName->empty())
2217 MatchingManager->runOnModule();
2218 MatchingManager->clearMatchingData();
2221 "No empty StringRef should be added in SymbolMap");
2223 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2226 bool retval =
false;
2227 for (
auto *
F : buildFunctionOrder(M, CG)) {
2229 clearFunctionData();
2235 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2241 removePseudoProbeInstsDiscriminator(M);
2243 M.eraseNamedMetadata(FuncInfo);
2250 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2251 DILocation2SampleMap.clear();
2262 initialEntryCount = 0;
2265 ProfAccForSymsInList =
false;
2267 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2273 if (ProfAccForSymsInList) {
2275 if (PSL->contains(
F.getName()))
2276 initialEntryCount = 0;
2290 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2292 initialEntryCount = -1;
2297 if (!
F.getEntryCount())
2299 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2306 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2307 ORE = OwnedORE.get();
2311 Samples = ContextTracker->getBaseSamplesFor(
F);
2313 Samples = Reader->getSamplesFor(
F);
2318 auto It = OutlineFunctionSamples.find(
FunctionId(CanonName));
2319 if (It != OutlineFunctionSamples.end()) {
2320 Samples = &It->second;
2321 }
else if (
auto Remapper = Reader->getRemapper()) {
2322 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2323 It = OutlineFunctionSamples.find(
FunctionId(*RemppedName));
2324 if (It != OutlineFunctionSamples.end())
2325 Samples = &It->second;
2331 if (Samples && !Samples->
empty())
2332 return emitAnnotations(
F);
2338 bool UseFlattenedProfile)
2339 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2340 LTOPhase(LTOPhase), FS(
std::
move(FS)),
2341 DisableSampleProfileInlining(DisableSampleProfileInlining),
2342 UseFlattenedProfile(UseFlattenedProfile) {}
2363 SampleProfileLoader SampleLoader(
2366 : ProfileRemappingFileName,
2367 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2368 DisableSampleProfileInlining, UseFlattenedProfile);
2369 if (!SampleLoader.doInitialization(M, &
FAM))
2373 if (!SampleLoader.runOnModule(M, &AM, PSI))
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void refresh()
If no summary is present, attempt to refresh.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
FunctionId getFunction() const
Return the function name.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasState(ContextStateMask S)
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > ProfileInlineGrowthLimit
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)