89#include <system_error>
96#define DEBUG_TYPE "sample-profile"
97#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100 "Number of functions inlined with context sensitive profile");
102 "Number of functions not inlined with context sensitive profile");
104 "Number of functions with CFG mismatched profile");
105STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
107 "Number of inlined callsites with a partial distribution factor");
110 "Number of functions with FDO inline stopped due to min size limit");
112 "Number of functions with FDO inline stopped due to max size limit");
114 NumCSInlinedHitGrowthLimit,
115 "Number of functions with FDO inline stopped due to growth size limit");
134 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
138 cl::desc(
"Salvage unused profile by matching with new "
139 "functions on call graph."));
143 cl::desc(
"Compute and report stale profile statistical metrics."));
147 cl::desc(
"Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
152 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
158 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
164 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overridden by profile-sample-accurate. "));
169 cl::desc(
"Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
176 cl::desc(
"Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
182 cl::desc(
"Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
188 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
197 "If true, artificially skip inline transformation in sample-loader "
198 "pass, and merge (or scale) profiles (as configured by "
199 "--sample-profile-merge-inlinee)."));
203 cl::desc(
"Sort profiled recursion by edge weights."));
207 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
208 "loader inlining."));
212 cl::desc(
"The lower bound of size growth limit for "
213 "proirity-based sample profile loader inlining."));
217 cl::desc(
"The upper bound of size growth limit for "
218 "proirity-based sample profile loader inlining."));
222 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
227 cl::desc(
"Threshold for inlining cold callsites"));
233 "Relative hotness percentage threshold for indirect "
234 "call promotion in proirity-based sample profile loader inlining."));
239 "Skip relative hotness check for ICP up to given number of targets."));
243 cl::desc(
"A function is considered hot for staleness error check if its "
244 "total sample count is above the specified percentile"));
248 cl::desc(
"Skip the check if the number of hot functions is smaller than "
249 "the specified number."));
253 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
257 "sample-profile-prioritized-inline",
cl::Hidden,
258 cl::desc(
"Use call site prioritized inlining for sample profile loader. "
259 "Currently only CSSPGO is supported."));
263 cl::desc(
"Use the preinliner decisions stored in profile context."));
266 "sample-profile-recursive-inline",
cl::Hidden,
267 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
271 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
276 "Optimization remarks file containing inline remarks to be replayed "
277 "by inlining from sample profile loader."),
281 "sample-profile-inline-replay-scope",
284 "Replay on functions that have remarks associated "
285 "with them (default)"),
287 "Replay on the entire module")),
288 cl::desc(
"Whether inline replay should be applied to the entire "
289 "Module or just the Functions (default) that are present as "
290 "callers in remarks during sample profile inlining."),
294 "sample-profile-inline-replay-fallback",
299 "All decisions not in replay send to original advisor (default)"),
301 "AlwaysInline",
"All decisions not in replay are inlined"),
303 "All decisions not in replay are not inlined")),
304 cl::desc(
"How sample profile inline replay treats sites that don't come "
305 "from the replay. Original: defers to original advisor, "
306 "AlwaysInline: inline all sites not in replay, NeverInline: "
307 "inline no sites not in replay"),
311 "sample-profile-inline-replay-format",
316 "<Line Number>:<Column Number>"),
318 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
320 "LineColumnDiscriminator",
321 "<Line Number>:<Column Number>.<Discriminator> (default)")),
326 cl::desc(
"Max number of promotions for a single indirect "
327 "call callsite in sample profile loader"));
331 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
335 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
336 "sample-profile inline pass name."));
346using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
351class GUIDToFuncNameMapper {
356 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
360 for (
const auto &
F : CurrentModule) {
362 CurrentGUIDToFuncNameMap.insert(
373 if (CanonName != OrigName)
374 CurrentGUIDToFuncNameMap.insert(
379 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
382 ~GUIDToFuncNameMapper() {
386 CurrentGUIDToFuncNameMap.clear();
390 SetGUIDToFuncNameMapForAll(
nullptr);
395 std::queue<FunctionSamples *> FSToUpdate;
397 FSToUpdate.push(&IFS.second);
400 while (!FSToUpdate.empty()) {
403 FS->GUIDToFuncNameMap = Map;
404 for (
const auto &ICS : FS->getCallsiteSamples()) {
406 for (
const auto &IFS : FSMap) {
408 FSToUpdate.push(&FS);
420struct InlineCandidate {
422 const FunctionSamples *CalleeSamples;
427 uint64_t CallsiteCount;
430 float CallsiteDistribution;
434struct CandidateComparer {
435 bool operator()(
const InlineCandidate &
LHS,
const InlineCandidate &
RHS) {
436 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
437 return LHS.CallsiteCount <
RHS.CallsiteCount;
439 const FunctionSamples *LCS =
LHS.CalleeSamples;
440 const FunctionSamples *RCS =
RHS.CalleeSamples;
455using CandidateQueue =
468 IntrusiveRefCntPtr<vfs::FileSystem> FS,
469 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
470 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
471 std::function<
const TargetLibraryInfo &(Function &)> GetTLI,
472 LazyCallGraph &CG,
bool DisableSampleProfileInlining,
473 bool UseFlattenedProfile)
476 GetAC(std::
move(GetAssumptionCache)),
477 GetTTI(std::
move(GetTargetTransformInfo)), GetTLI(std::
move(GetTLI)),
478 CG(CG), LTOPhase(LTOPhase),
483 DisableSampleProfileInlining(DisableSampleProfileInlining),
484 UseFlattenedProfile(UseFlattenedProfile) {}
488 ProfileSummaryInfo *_PSI);
492 bool emitAnnotations(Function &
F);
493 ErrorOr<uint64_t> getInstWeight(
const Instruction &
I)
override;
494 const FunctionSamples *findCalleeFunctionSamples(
const CallBase &
I)
const;
495 const FunctionSamples *
496 findFunctionSamples(
const Instruction &
I)
const override;
497 std::vector<const FunctionSamples *>
498 findIndirectCallFunctionSamples(
const Instruction &
I, uint64_t &Sum)
const;
499 void findExternalInlineCandidate(CallBase *CB,
const FunctionSamples *Samples,
500 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
503 bool tryPromoteAndInlineCandidate(
504 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin,
507 bool inlineHotFunctions(Function &
F,
508 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
509 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
510 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
511 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
512 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
514 tryInlineCandidate(InlineCandidate &Candidate,
517 inlineHotFunctionsWithPriority(Function &
F,
518 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
520 bool shouldInlineColdCallee(CallBase &CallInst);
521 void emitOptimizationRemarksForInlineCandidates(
522 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
524 void promoteMergeNotInlinedContextSamples(
525 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
527 std::vector<Function *> buildFunctionOrder(
Module &M, LazyCallGraph &CG);
528 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
529 void generateMDProfMetadata(Function &
F);
530 bool rejectHighStalenessProfile(
Module &M, ProfileSummaryInfo *PSI,
531 const SampleProfileMap &Profiles);
532 void removePseudoProbeInstsDiscriminator(
Module &M);
538 HashKeyMap<DenseMap, FunctionId, Function *> SymbolMap;
542 HashKeyMap<DenseMap, FunctionId, FunctionId> FuncNameToProfNameMap;
544 std::function<AssumptionCache &(
Function &)> GetAC;
545 std::function<TargetTransformInfo &(
Function &)> GetTTI;
546 std::function<
const TargetLibraryInfo &(
Function &)> GetTLI;
550 std::unique_ptr<SampleContextTracker> ContextTracker;
558 const std::string AnnotatedPassName;
562 std::shared_ptr<ProfileSymbolList> PSL;
567 struct NotInlinedProfileInfo {
570 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
574 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
578 StringSet<> NamesInProfile;
583 llvm::DenseSet<uint64_t> GUIDsInProfile;
590 bool ProfAccForSymsInList;
592 bool DisableSampleProfileInlining;
594 bool UseFlattenedProfile;
597 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
600 std::unique_ptr<SampleProfileMatcher> MatchingManager;
603 const char *getAnnotatedRemarkPassName()
const {
604 return AnnotatedPassName.c_str();
611inline bool SampleProfileInference<Function>::isExit(
const BasicBlock *BB) {
616inline void SampleProfileInference<Function>::findUnlikelyJumps(
617 const std::vector<const BasicBlockT *> &BasicBlocks,
619 for (
auto &Jump :
Func.Jumps) {
620 const auto *BB = BasicBlocks[Jump.Source];
621 const auto *Succ = BasicBlocks[Jump.Target];
625 const auto &Succs = Successors[BB];
626 if (Succs.size() == 2 && Succs.back() == Succ) {
628 Jump.IsUnlikely =
true;
635 Jump.IsUnlikely =
true;
656 return getProbeWeight(Inst);
660 return std::error_code();
666 return std::error_code();
676 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
679 return getInstWeightImpl(Inst);
694const FunctionSamples *
695SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
701 StringRef CalleeName;
703 CalleeName =
Callee->getName();
706 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
708 const FunctionSamples *
FS = findFunctionSamples(Inst);
713 CalleeName, Reader->getRemapper(),
714 &FuncNameToProfNameMap);
720std::vector<const FunctionSamples *>
721SampleProfileLoader::findIndirectCallFunctionSamples(
722 const Instruction &Inst, uint64_t &Sum)
const {
724 std::vector<const FunctionSamples *>
R;
730 auto FSCompare = [](
const FunctionSamples *
L,
const FunctionSamples *
R) {
731 assert(L && R &&
"Expect non-null FunctionSamples");
732 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
733 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
734 return L->getGUID() <
R->getGUID();
739 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
740 if (CalleeSamples.empty())
746 for (
const auto *
const FS : CalleeSamples) {
747 Sum +=
FS->getHeadSamplesEstimate();
754 const FunctionSamples *
FS = findFunctionSamples(Inst);
760 if (
auto T =
FS->findCallTargetMapAt(CallSite))
761 for (
const auto &T_C : *
T)
766 for (
const auto &NameFS : *M) {
767 Sum += NameFS.second.getHeadSamplesEstimate();
768 R.push_back(&NameFS.second);
775const FunctionSamples *
776SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
787 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
790 it.first->second = ContextTracker->getContextSamplesFor(DIL);
792 it.first->second = Samples->findFunctionSamples(
793 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
795 return it.first->second;
811 if (ValueData.empty())
814 unsigned NumPromoted = 0;
815 for (
const auto &V : ValueData) {
857 "If sum is 0, assume only one element in CallTargets "
858 "with count being NOMORE_ICP_MAGICNUM");
860 for (
const auto &V : ValueData)
861 ValueCountMap[V.Value] = V.Count;
867 OldSum -= Pair.first->second;
874 for (
const auto &V : ValueData) {
876 ValueCountMap[V.Value] = V.Count;
879 for (
const auto &
Data : CallTargets) {
886 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
892 for (
const auto &ValueCount : ValueCountMap) {
894 InstrProfValueData{ValueCount.first, ValueCount.second});
898 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
899 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
905 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
918bool SampleProfileLoader::tryPromoteAndInlineCandidate(
919 Function &
F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
922 if (DisableSampleProfileInlining)
929 auto CalleeFunctionName = Candidate.CalleeSamples->
getFunction();
930 auto R = SymbolMap.find(CalleeFunctionName);
931 if (R == SymbolMap.end() || !
R->second)
934 auto &CI = *Candidate.CallInstr;
938 const char *Reason =
"Callee function not available";
945 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
946 R->second->hasFnAttribute(
"use-sample-profile") &&
951 Function::getGUIDAssumingExternalLinkage(
R->second->getName()),
956 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
958 Sum -= Candidate.CallsiteCount;
971 Candidate.CallInstr = DI;
973 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
978 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
986 Candidate.CallInstr->
getName())<<
" because "
992bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
997 if (Callee ==
nullptr)
1006 if (
Cost.isAlways())
1012void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1013 const SmallVectorImpl<CallBase *> &Candidates,
const Function &
F,
1015 for (
auto *
I : Candidates) {
1016 Function *CalledFunction =
I->getCalledFunction();
1017 if (CalledFunction) {
1018 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1019 "InlineAttempt",
I->getDebugLoc(),
1021 <<
"previous inlining reattempted for "
1022 << (
Hot ?
"hotness: '" :
"size: '")
1023 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1029void SampleProfileLoader::findExternalInlineCandidate(
1030 CallBase *CB,
const FunctionSamples *Samples,
1031 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1035 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1039 InlinedGUIDs.
insert(Function::getGUIDAssumingExternalLinkage(
1065 ContextTrieNode *
Caller = ContextTracker->getContextNodeForProfile(Samples);
1066 std::queue<ContextTrieNode *> CalleeList;
1067 CalleeList.push(Caller);
1068 while (!CalleeList.empty()) {
1069 ContextTrieNode *
Node = CalleeList.front();
1071 FunctionSamples *CalleeSample =
Node->getFunctionSamples();
1087 if (!Func ||
Func->isDeclaration())
1093 for (
const auto &TS : BS.second.getCallTargets())
1094 if (TS.second > Threshold) {
1096 if (!Callee ||
Callee->isDeclaration())
1097 InlinedGUIDs.
insert(TS.first.getHashCode());
1104 for (
auto &Child :
Node->getAllChildContext()) {
1105 ContextTrieNode *CalleeNode = &Child.second;
1106 CalleeList.push(CalleeNode);
1133bool SampleProfileLoader::inlineHotFunctions(
1134 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1137 assert((!ProfAccForSymsInList ||
1139 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1140 "ProfAccForSymsInList should be false when profile-sample-accurate "
1143 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1145 bool LocalChanged =
true;
1146 while (LocalChanged) {
1147 LocalChanged =
false;
1149 for (
auto &BB :
F) {
1153 for (
auto &
I : BB) {
1154 const FunctionSamples *
FS =
nullptr;
1157 if ((FS = findCalleeFunctionSamples(*CB))) {
1159 "GUIDToFuncNameMap has to be populated");
1161 if (
FS->getHeadSamplesEstimate() > 0 ||
1163 LocalNotInlinedCallSites.
insert({CB,
FS});
1166 else if (shouldInlineColdCallee(*CB))
1168 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1174 if (
Hot || ExternalInlineAdvisor) {
1176 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1179 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1182 for (CallBase *
I : CIS) {
1183 Function *CalledFunction =
I->getCalledFunction();
1184 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1188 if (CalledFunction == &
F)
1190 if (
I->isIndirectCall()) {
1192 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1193 uint64_t SumOrigin = Sum;
1194 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1195 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1196 PSI->getOrCompHotCountThreshold());
1202 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1203 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1204 LocalNotInlinedCallSites.
erase(
I);
1205 LocalChanged =
true;
1208 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1210 if (tryInlineCandidate(Candidate)) {
1211 LocalNotInlinedCallSites.
erase(
I);
1212 LocalChanged =
true;
1214 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1215 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1217 PSI->getOrCompHotCountThreshold());
1226 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1230bool SampleProfileLoader::tryInlineCandidate(
1234 if (DisableSampleProfileInlining)
1237 CallBase &CB = *Candidate.CallInstr;
1239 assert(CalledFunction &&
"Expect a callee with definition");
1243 InlineCost
Cost = shouldInlineCandidate(Candidate);
1244 if (
Cost.isNever()) {
1245 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1246 "InlineFail", DLoc, BB)
1247 <<
"incompatible inlining");
1254 InlineFunctionInfo IFI(GetAC);
1255 IFI.UpdateProfile =
false;
1258 if (!
IR.isSuccess())
1263 Cost,
true, getAnnotatedRemarkPassName());
1266 if (InlinedCallSites) {
1267 InlinedCallSites->
clear();
1272 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1282 if (Candidate.CallsiteDistribution < 1) {
1283 for (
auto &
I : IFI.InlinedCallSites) {
1286 Candidate.CallsiteDistribution);
1288 NumDuplicatedInlinesite++;
1294bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1296 assert(CB &&
"Expect non-null call instruction");
1302 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1305 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1309 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1310 Factor = Probe->Factor;
1312 uint64_t CallsiteCount =
1314 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1318std::optional<InlineCost>
1319SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1320 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1321 if (ExternalInlineAdvisor) {
1322 Advice = ExternalInlineAdvisor->getAdvice(CB);
1324 if (!Advice->isInliningRecommended()) {
1325 Advice->recordUnattemptedInlining();
1328 Advice->recordInlining();
1336bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1337 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1342SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1343 if (std::optional<InlineCost> ReplayCost =
1344 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1350 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1357 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1370 GetTTI(*Callee), GetAC, GetTLI);
1373 if (
Cost.isNever() ||
Cost.isAlways())
1407bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1408 Function &
F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1411 assert((!ProfAccForSymsInList ||
1413 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1414 "ProfAccForSymsInList should be false when profile-sample-accurate "
1419 CandidateQueue CQueue;
1420 InlineCandidate NewCandidate;
1421 for (
auto &BB :
F) {
1422 for (
auto &
I : BB) {
1426 if (getInlineCandidate(&NewCandidate, CB))
1427 CQueue.push(NewCandidate);
1436 "Max inline size limit should not be smaller than min inline size "
1441 if (ExternalInlineAdvisor)
1442 SizeLimit = std::numeric_limits<unsigned>::max();
1444 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1448 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1449 InlineCandidate Candidate = CQueue.top();
1451 CallBase *
I = Candidate.CallInstr;
1452 Function *CalledFunction =
I->getCalledFunction();
1454 if (CalledFunction == &
F)
1456 if (
I->isIndirectCall()) {
1458 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1459 uint64_t SumOrigin = Sum;
1460 Sum *= Candidate.CallsiteDistribution;
1461 unsigned ICPCount = 0;
1462 for (
const auto *FS : CalleeSamples) {
1464 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1465 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1466 PSI->getOrCompHotCountThreshold());
1469 uint64_t EntryCountDistributed =
1470 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1485 if (!PSI->isHotCount(EntryCountDistributed))
1490 Candidate = {
I,
FS, EntryCountDistributed,
1491 Candidate.CallsiteDistribution};
1492 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1493 &InlinedCallSites)) {
1494 for (
auto *CB : InlinedCallSites) {
1495 if (getInlineCandidate(&NewCandidate, CB))
1496 CQueue.emplace(NewCandidate);
1500 }
else if (!ContextTracker) {
1501 LocalNotInlinedCallSites.
insert({
I,
FS});
1504 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1507 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1508 for (
auto *CB : InlinedCallSites) {
1509 if (getInlineCandidate(&NewCandidate, CB))
1510 CQueue.emplace(NewCandidate);
1513 }
else if (!ContextTracker) {
1514 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1516 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1517 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1519 PSI->getOrCompHotCountThreshold());
1523 if (!CQueue.empty()) {
1525 ++NumCSInlinedHitMaxLimit;
1527 ++NumCSInlinedHitMinLimit;
1529 ++NumCSInlinedHitGrowthLimit;
1535 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1539void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1540 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1541 const Function &
F) {
1543 for (
const auto &Pair : NonInlinedCallSites) {
1544 CallBase *
I = Pair.first;
1546 if (!Callee ||
Callee->isDeclaration())
1550 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
"NotInline",
1551 I->getDebugLoc(),
I->getParent())
1552 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1553 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1556 const FunctionSamples *
FS = Pair.second;
1557 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1571 if (
FS->getHeadSamples() == 0) {
1574 const_cast<FunctionSamples *
>(
FS)->addHeadSamples(
1575 FS->getHeadSamplesEstimate());
1580 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1584 OutlineFS = &OutlineFunctionSamples[
1586 OutlineFS->
merge(*FS, 1);
1592 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1593 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1604 InstrProfValueData{
I.first.getHashCode(),
I.second});
1611void SampleProfileLoader::generateMDProfMetadata(Function &
F) {
1614 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1615 LLVMContext &Ctx =
F.getContext();
1617 for (
auto &BI :
F) {
1620 if (BlockWeights[BB]) {
1621 for (
auto &
I : *BB) {
1628 const DILocation *DIL = DLoc;
1629 const FunctionSamples *
FS = findFunctionSamples(
I);
1633 ErrorOr<SampleRecord::CallTargetMap>
T =
1634 FS->findCallTargetMapAt(CallSite);
1635 if (!
T ||
T.get().empty())
1642 if (Probe->Factor < 1)
1649 for (
const auto &
C :
T.get())
1656 FS->findFunctionSamplesMapAt(CallSite)) {
1657 for (
const auto &NameFS : *M)
1658 Sum += NameFS.second.getHeadSamplesEstimate();
1664 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1667 I, ArrayRef<uint32_t>{
static_cast<uint32_t
>(BlockWeights[BB])},
1674 for (
auto &
I : *BB) {
1677 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1695 << ((BranchLoc) ? Twine(BranchLoc.
getLine())
1696 : Twine(
"<UNKNOWN LOCATION>"))
1698 SmallVector<uint32_t, 4> Weights;
1699 uint32_t MaxWeight = 0;
1704 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1705 std::vector<uint64_t> EdgeIndex;
1710 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1711 EdgeMultiplicity[Succ]++;
1716 Edge E = std::make_pair(BB, Succ);
1717 uint64_t Weight = EdgeWeights[
E];
1722 if (Weight > std::numeric_limits<uint32_t>::max()) {
1724 Weight = std::numeric_limits<uint32_t>::max();
1729 Weights.
push_back(
static_cast<uint32_t
>(
1730 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1735 uint64_t
W = Weight / EdgeMultiplicity[Succ];
1737 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1739 Weights.
push_back(
static_cast<uint32_t
>(W));
1742 if (Weight > MaxWeight) {
1744 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1751 uint64_t TempWeight;
1760 if (MaxWeight > 0 &&
1765 return OptimizationRemark(
DEBUG_TYPE,
"PopularDest", MaxDestInst)
1766 <<
"most popular destination for conditional branches at "
1767 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1786bool SampleProfileLoader::emitAnnotations(Function &
F) {
1791 if (!ProbeManager->getDesc(
F))
1792 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1796 if (ProbeManager->profileIsValid(
F, *Samples)) {
1797 ++NumMatchedProfile;
1799 ++NumMismatchedProfile;
1801 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1802 <<
F.getName() <<
"\n");
1807 if (getFunctionLoc(
F) == 0)
1811 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1814 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1816 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1818 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1820 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1823 generateMDProfMetadata(
F);
1825 emitCoverageRemarks(
F);
1829std::unique_ptr<ProfiledCallGraph>
1830SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1831 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1833 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1835 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1840 for (Function &
F : M) {
1843 ProfiledCG->addProfiledFunction(
1850std::vector<Function *>
1851SampleProfileLoader::buildFunctionOrder(
Module &M, LazyCallGraph &CG) {
1852 std::vector<Function *> FunctionOrderList;
1853 FunctionOrderList.reserve(
M.size());
1856 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1857 "together with -sample-profile-top-down-load.\n";
1869 for (Function &
F : M)
1871 FunctionOrderList.push_back(&
F);
1872 return FunctionOrderList;
1925 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1926 scc_iterator<ProfiledCallGraph *> CGI =
scc_begin(ProfiledCG.get());
1931 scc_member_iterator<ProfiledCallGraph *>
SI(*CGI);
1934 for (
auto *Node :
Range) {
1937 FunctionOrderList.push_back(
F);
1941 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1946 dbgs() <<
"Function processing order:\n";
1947 for (
auto F : FunctionOrderList) {
1948 dbgs() <<
F->getName() <<
"\n";
1952 return FunctionOrderList;
1955bool SampleProfileLoader::doInitialization(
Module &M,
1957 auto &Ctx =
M.getContext();
1960 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1961 if (std::error_code EC = ReaderOrErr.getError()) {
1962 std::string Msg =
"Could not open profile: " +
EC.message();
1966 Reader = std::move(ReaderOrErr.get());
1967 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1970 Reader->setModule(&M);
1971 if (std::error_code EC = Reader->read()) {
1972 std::string Msg =
"profile reading failed: " +
EC.message();
1977 PSL = Reader->getProfileSymbolList();
1982 if (UseFlattenedProfile)
1984 Reader->profileIsCS());
1987 ProfAccForSymsInList =
1989 if (ProfAccForSymsInList) {
1990 NamesInProfile.
clear();
1991 GUIDsInProfile.
clear();
1992 auto NameTable = Reader->getNameTable();
1994 for (FunctionId Name : NameTable)
1997 for (FunctionId Name : NameTable)
2000 CoverageTracker.setProfAccForSymsInList(
true);
2005 M, *
FAM, Ctx,
nullptr,
2010 false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2014 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2015 Reader->profileIsProbeBased()) {
2031 if (Reader->profileIsPreInlined()) {
2041 if (Reader->profileIsProbeBased()) {
2048 if (!Reader->profileIsCS()) {
2060 if (Reader->profileIsCS()) {
2062 ContextTracker = std::make_unique<SampleContextTracker>(
2063 Reader->getProfiles(), &GUIDToFuncNameMap);
2067 if (Reader->profileIsProbeBased()) {
2068 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2069 if (!ProbeManager->moduleIsProbed(M)) {
2071 "Pseudo-probe-based profile requires SampleProfileProbePass";
2072 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg,
2080 MatchingManager = std::make_unique<SampleProfileMatcher>(
2081 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2082 FuncNameToProfNameMap);
2098bool SampleProfileLoader::rejectHighStalenessProfile(
2099 Module &M, ProfileSummaryInfo *PSI,
const SampleProfileMap &Profiles) {
2101 "Only support for probe-based profile");
2102 uint64_t TotalHotFunc = 0;
2103 uint64_t NumMismatchedFunc = 0;
2104 for (
const auto &
I : Profiles) {
2105 const auto &
FS =
I.second;
2106 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2112 FS.getTotalSamples()))
2116 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS) &&
2117 !ProbeManager->probeFromWeakSymbol(
FS.getGUID()))
2118 NumMismatchedFunc++;
2126 if (NumMismatchedFunc * 100 >=
2128 auto &Ctx =
M.getContext();
2130 "The input profile significantly mismatches current source code. "
2131 "Please recollect profile to avoid performance regression.";
2132 Ctx.
diagnose(DiagnosticInfoSampleProfile(
M.getModuleIdentifier(), Msg));
2138void SampleProfileLoader::removePseudoProbeInstsDiscriminator(
Module &M) {
2140 std::vector<Instruction *> InstsToDel;
2141 for (
auto &BB :
F) {
2142 for (
auto &
I : BB) {
2144 InstsToDel.push_back(&
I);
2146 if (
const DILocation *DIL =
I.getDebugLoc().get()) {
2150 std::optional<uint32_t> DwarfDiscriminator =
2159 for (
auto *
I : InstsToDel)
2160 I->eraseFromParent();
2165 ProfileSummaryInfo *_PSI) {
2166 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2169 if (
M.getProfileSummary(
false) ==
nullptr) {
2170 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2176 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2179 auto Remapper = Reader->getRemapper();
2181 for (
const auto &N_F :
M.getValueSymbolTable()) {
2182 StringRef OrigName = N_F.getKey();
2184 if (
F ==
nullptr || OrigName.
empty())
2186 SymbolMap[FunctionId(OrigName)] =
F;
2188 if (OrigName != NewName && !NewName.
empty()) {
2189 auto r = SymbolMap.emplace(FunctionId(NewName),
F);
2195 r.first->second =
nullptr;
2200 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2201 if (*MapName != OrigName && !MapName->empty())
2202 SymbolMap.emplace(FunctionId(*MapName),
F);
2210 MatchingManager->runOnModule();
2211 MatchingManager->clearMatchingData();
2213 assert(SymbolMap.count(FunctionId()) == 0 &&
2214 "No empty StringRef should be added in SymbolMap");
2216 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2219 bool retval =
false;
2220 for (
auto *
F : buildFunctionOrder(M, CG)) {
2222 clearFunctionData();
2228 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2234 removePseudoProbeInstsDiscriminator(M);
2236 M.eraseNamedMetadata(FuncInfo);
2242bool SampleProfileLoader::runOnFunction(Function &
F,
2244 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2245 DILocation2SampleMap.clear();
2250 uint64_t initialEntryCount = -1;
2256 initialEntryCount = 0;
2259 ProfAccForSymsInList =
false;
2261 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2267 if (ProfAccForSymsInList) {
2269 if (PSL->contains(
F.getName()))
2270 initialEntryCount = 0;
2284 GUIDsInProfile.
count(
2285 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2287 initialEntryCount = -1;
2292 if (!
F.getEntryCount())
2293 F.setEntryCount(initialEntryCount);
2299 Samples = ContextTracker->getBaseSamplesFor(
F);
2301 Samples = Reader->getSamplesFor(
F);
2306 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2307 if (It != OutlineFunctionSamples.end()) {
2308 Samples = &It->second;
2309 }
else if (
auto Remapper = Reader->getRemapper()) {
2310 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2311 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2312 if (It != OutlineFunctionSamples.end())
2313 Samples = &It->second;
2319 if (Samples && !Samples->
empty())
2320 return emitAnnotations(
F);
2326 bool UseFlattenedProfile)
2327 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2328 LTOPhase(LTOPhase), FS(
std::
move(FS)),
2329 DisableSampleProfileInlining(DisableSampleProfileInlining),
2330 UseFlattenedProfile(UseFlattenedProfile) {}
2351 SampleProfileLoader SampleLoader(
2354 : ProfileRemappingFileName,
2355 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2356 DisableSampleProfileInlining, UseFlattenedProfile);
2357 if (!SampleLoader.doInitialization(M, &
FAM))
2361 if (!SampleLoader.runOnModule(M, AM, PSI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static LVReader * CurrentReader
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static constexpr StringLiteral Filename
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
LLVM_ABI unsigned getLine() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
PostDominatorTreePtrT PDT
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Represent a constant reference to a string, i.e.
constexpr bool empty() const
Check if the string is empty.
std::pair< typename Base::iterator, bool > insert(StringRef key)
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
Representation of the samples collected for a function.
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< DenseMap, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
DenseMap< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
LLVM_ABI bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, bool TrackInlineHistory=false, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
LLVM_ABI cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< std::string > SampleProfileFile
constexpr const char * PseudoProbeDescMetadataName
Implement std::hash so that hash_code can be used in STL containers.
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)