91#include <system_error>
96using namespace sampleprof;
99#define DEBUG_TYPE "sample-profile"
100#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
103 "Number of functions inlined with context sensitive profile");
105 "Number of functions not inlined with context sensitive profile");
107 "Number of functions with CFG mismatched profile");
108STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
110 "Number of inlined callsites with a partial distribution factor");
113 "Number of functions with FDO inline stopped due to min size limit");
115 "Number of functions with FDO inline stopped due to max size limit");
117 NumCSInlinedHitGrowthLimit,
118 "Number of functions with FDO inline stopped due to growth size limit");
135 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
136 "location for sample profile query."));
140 cl::desc(
"Compute and report stale profile statistical metrics."));
144 cl::desc(
"Compute stale profile statistical metrics and write it into the "
145 "native object file(.llvm_stats section)."));
149 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
150 "callsite and function as having 0 samples. Otherwise, treat "
151 "un-sampled callsites and functions conservatively as unknown. "));
155 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
156 "branches and calls as having 0 samples. Otherwise, treat "
157 "them conservatively as unknown. "));
161 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
162 "be accurate. It may be overriden by profile-sample-accurate. "));
166 cl::desc(
"Merge past inlinee's profile to outline version if sample "
167 "profile loader decided not to inline a call site. It will "
168 "only be enabled when top-down order of profile loading is "
173 cl::desc(
"Do profile annotation and inlining for functions in top-down "
174 "order of call graph during sample profile loading. It only "
175 "works for new pass manager. "));
179 cl::desc(
"Process functions in a top-down order "
180 "defined by the profiled call graph when "
181 "-sample-profile-top-down-load is on."));
185 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
193 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
194 "pass, and merge (or scale) profiles (as configured by "
195 "--sample-profile-merge-inlinee)."));
200 cl::desc(
"Sort profiled recursion by edge weights."));
204 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
205 "loader inlining."));
209 cl::desc(
"The lower bound of size growth limit for "
210 "proirity-based sample profile loader inlining."));
214 cl::desc(
"The upper bound of size growth limit for "
215 "proirity-based sample profile loader inlining."));
219 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
224 cl::desc(
"Threshold for inlining cold callsites"));
230 "Relative hotness percentage threshold for indirect "
231 "call promotion in proirity-based sample profile loader inlining."));
236 "Skip relative hotness check for ICP up to given number of targets."));
240 cl::desc(
"A function is considered hot for staleness error check if its "
241 "total sample count is above the specified percentile"));
245 cl::desc(
"Skip the check if the number of hot functions is smaller than "
246 "the specified number."));
250 cl::desc(
"Reject the profile if the mismatch percent is higher than the "
254 "sample-profile-prioritized-inline",
cl::Hidden,
255 cl::desc(
"Use call site prioritized inlining for sample profile loader."
256 "Currently only CSSPGO is supported."));
260 cl::desc(
"Use the preinliner decisions stored in profile context."));
263 "sample-profile-recursive-inline",
cl::Hidden,
264 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
268 cl::desc(
"Remove pseudo-probe after sample profile annotation."));
273 "Optimization remarks file containing inline remarks to be replayed "
274 "by inlining from sample profile loader."),
278 "sample-profile-inline-replay-scope",
279 cl::init(ReplayInlinerSettings::Scope::Function),
281 "Replay on functions that have remarks associated "
282 "with them (default)"),
283 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
284 "Replay on the entire module")),
285 cl::desc(
"Whether inline replay should be applied to the entire "
286 "Module or just the Functions (default) that are present as "
287 "callers in remarks during sample profile inlining."),
291 "sample-profile-inline-replay-fallback",
292 cl::init(ReplayInlinerSettings::Fallback::Original),
295 ReplayInlinerSettings::Fallback::Original,
"Original",
296 "All decisions not in replay send to original advisor (default)"),
297 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
298 "AlwaysInline",
"All decisions not in replay are inlined"),
299 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
300 "All decisions not in replay are not inlined")),
301 cl::desc(
"How sample profile inline replay treats sites that don't come "
302 "from the replay. Original: defers to original advisor, "
303 "AlwaysInline: inline all sites not in replay, NeverInline: "
304 "inline no sites not in replay"),
308 "sample-profile-inline-replay-format",
309 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
311 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
312 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
313 "<Line Number>:<Column Number>"),
314 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
315 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
316 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
317 "LineColumnDiscriminator",
318 "<Line Number>:<Column Number>.<Discriminator> (default)")),
323 cl::desc(
"Max number of promotions for a single indirect "
324 "call callsite in sample profile loader"));
328 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
332 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
333 "sample-profile inline pass name."));
343using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
348class GUIDToFuncNameMapper {
353 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
357 for (
const auto &
F : CurrentModule) {
359 CurrentGUIDToFuncNameMap.insert(
360 {Function::getGUID(OrigName), OrigName});
370 if (CanonName != OrigName)
371 CurrentGUIDToFuncNameMap.insert(
372 {Function::getGUID(CanonName), CanonName});
376 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
379 ~GUIDToFuncNameMapper() {
383 CurrentGUIDToFuncNameMap.clear();
387 SetGUIDToFuncNameMapForAll(
nullptr);
392 std::queue<FunctionSamples *> FSToUpdate;
394 FSToUpdate.push(&IFS.second);
397 while (!FSToUpdate.empty()) {
400 FS->GUIDToFuncNameMap = Map;
401 for (
const auto &ICS : FS->getCallsiteSamples()) {
403 for (
const auto &IFS : FSMap) {
405 FSToUpdate.push(&FS);
417struct InlineCandidate {
427 float CallsiteDistribution;
431struct CandidateComparer {
432 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
433 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
434 return LHS.CallsiteCount <
RHS.CallsiteCount;
438 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
449using CandidateQueue =
468 GetAC(
std::
move(GetAssumptionCache)),
469 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
487 std::vector<const FunctionSamples *>
493 bool tryPromoteAndInlineCandidate(
499 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
500 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
501 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
502 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
504 tryInlineCandidate(InlineCandidate &Candidate,
507 inlineHotFunctionsWithPriority(
Function &
F,
511 void emitOptimizationRemarksForInlineCandidates(
514 void promoteMergeNotInlinedContextSamples(
518 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
519 void generateMDProfMetadata(
Function &
F);
522 void removePseudoProbeInsts(
Module &M);
535 std::unique_ptr<SampleContextTracker> ContextTracker;
543 const std::string AnnotatedPassName;
547 std::unique_ptr<ProfileSymbolList> PSL;
558 struct NotInlinedProfileInfo {
581 bool ProfAccForSymsInList;
584 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
587 std::unique_ptr<SampleProfileMatcher> MatchingManager;
590 const char *getAnnotatedRemarkPassName()
const {
591 return AnnotatedPassName.c_str();
604 const std::vector<const BasicBlockT *> &BasicBlocks,
606 for (
auto &Jump :
Func.Jumps) {
607 const auto *BB = BasicBlocks[Jump.Source];
608 const auto *Succ = BasicBlocks[Jump.Target];
612 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
613 if (isa<InvokeInst>(TI)) {
614 Jump.IsUnlikely =
true;
620 if (isa<UnreachableInst>(SuccTI)) {
621 Jump.IsUnlikely =
true;
642 return getProbeWeight(Inst);
646 return std::error_code();
651 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
652 return std::error_code();
661 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
662 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
665 return getInstWeightImpl(Inst);
681SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
689 CalleeName =
Callee->getName();
692 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
699 CalleeName, Reader->getRemapper());
705std::vector<const FunctionSamples *>
706SampleProfileLoader::findIndirectCallFunctionSamples(
709 std::vector<const FunctionSamples *>
R;
716 assert(L && R &&
"Expect non-null FunctionSamples");
717 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
718 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
719 return L->getGUID() <
R->getGUID();
724 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
725 if (CalleeSamples.empty())
731 for (
const auto *
const FS : CalleeSamples) {
732 Sum +=
FS->getHeadSamplesEstimate();
745 if (
auto T =
FS->findCallTargetMapAt(CallSite))
746 for (
const auto &T_C : *
T)
751 for (
const auto &NameFS : *M) {
752 Sum += NameFS.second.getHeadSamplesEstimate();
753 R.push_back(&NameFS.second);
761SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
772 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
775 it.first->second = ContextTracker->getContextSamplesFor(DIL);
778 Samples->findFunctionSamples(DIL, Reader->getRemapper());
780 return it.first->second;
796 if (ValueData.empty())
799 unsigned NumPromoted = 0;
800 for (
const auto &V : ValueData) {
807 if (V.Value == Function::getGUID(Candidate))
842 "If sum is 0, assume only one element in CallTargets "
843 "with count being NOMORE_ICP_MAGICNUM");
845 for (
const auto &V : ValueData)
846 ValueCountMap[V.Value] = V.Count;
852 OldSum -= Pair.first->second;
859 for (
const auto &V : ValueData) {
861 ValueCountMap[V.Value] = V.Count;
864 for (
const auto &Data : CallTargets) {
865 auto Pair = ValueCountMap.
try_emplace(Data.Value, Data.Count);
871 assert(Sum >= Data.Count &&
"Sum should never be less than Data.Count");
877 for (
const auto &ValueCount : ValueCountMap) {
879 InstrProfValueData{ValueCount.first, ValueCount.second});
883 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
884 if (L.Count != R.Count)
885 return L.Count > R.Count;
886 return L.Value > R.Value;
892 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
905bool SampleProfileLoader::tryPromoteAndInlineCandidate(
916 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
921 auto &CI = *Candidate.CallInstr;
925 const char *Reason =
"Callee function not available";
932 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
933 R->second->hasFnAttribute(
"use-sample-profile") &&
942 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
944 Sum -= Candidate.CallsiteCount;
957 Candidate.CallInstr = DI;
958 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
959 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
964 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
972 Candidate.CallInstr->getName())<<
" because "
983 if (Callee ==
nullptr)
998void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1001 for (
auto *
I : Candidates) {
1002 Function *CalledFunction =
I->getCalledFunction();
1003 if (CalledFunction) {
1005 "InlineAttempt",
I->getDebugLoc(),
1007 <<
"previous inlining reattempted for "
1008 << (
Hot ?
"hotness: '" :
"size: '")
1009 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1015void SampleProfileLoader::findExternalInlineCandidate(
1021 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1052 std::queue<ContextTrieNode *> CalleeList;
1053 CalleeList.push(Caller);
1054 while (!CalleeList.empty()) {
1073 if (!Func ||
Func->isDeclaration())
1079 for (
const auto &TS : BS.second.getCallTargets())
1080 if (TS.second > Threshold) {
1082 if (!Callee ||
Callee->isDeclaration())
1083 InlinedGUIDs.
insert(TS.first.getHashCode());
1090 for (
auto &Child :
Node->getAllChildContext()) {
1092 CalleeList.push(CalleeNode);
1119bool SampleProfileLoader::inlineHotFunctions(
1123 assert((!ProfAccForSymsInList ||
1125 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1126 "ProfAccForSymsInList should be false when profile-sample-accurate "
1130 bool Changed =
false;
1131 bool LocalChanged =
true;
1132 while (LocalChanged) {
1133 LocalChanged =
false;
1135 for (
auto &BB :
F) {
1139 for (
auto &
I : BB) {
1141 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1142 if (!isa<IntrinsicInst>(
I)) {
1143 if ((FS = findCalleeFunctionSamples(*CB))) {
1145 "GUIDToFuncNameMap has to be populated");
1147 if (
FS->getHeadSamplesEstimate() > 0 ||
1149 LocalNotInlinedCallSites.
insert({CB,
FS});
1152 else if (shouldInlineColdCallee(*CB))
1154 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1160 if (
Hot || ExternalInlineAdvisor) {
1162 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1165 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1169 Function *CalledFunction =
I->getCalledFunction();
1170 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1174 if (CalledFunction == &
F)
1176 if (
I->isIndirectCall()) {
1178 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1180 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1181 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1182 PSI->getOrCompHotCountThreshold());
1188 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1189 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1190 LocalNotInlinedCallSites.
erase(
I);
1191 LocalChanged =
true;
1194 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1196 if (tryInlineCandidate(Candidate)) {
1197 LocalNotInlinedCallSites.
erase(
I);
1198 LocalChanged =
true;
1200 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1201 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1203 PSI->getOrCompHotCountThreshold());
1206 Changed |= LocalChanged;
1212 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1216bool SampleProfileLoader::tryInlineCandidate(
1223 CallBase &CB = *Candidate.CallInstr;
1225 assert(CalledFunction &&
"Expect a callee with definition");
1230 if (
Cost.isNever()) {
1232 "InlineFail", DLoc, BB)
1233 <<
"incompatible inlining");
1241 IFI.UpdateProfile =
false;
1244 if (!
IR.isSuccess())
1249 Cost,
true, getAnnotatedRemarkPassName());
1252 if (InlinedCallSites) {
1253 InlinedCallSites->
clear();
1254 for (
auto &
I : IFI.InlinedCallSites)
1259 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1269 if (Candidate.CallsiteDistribution < 1) {
1270 for (
auto &
I : IFI.InlinedCallSites) {
1273 Candidate.CallsiteDistribution);
1275 NumDuplicatedInlinesite++;
1281bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1283 assert(CB &&
"Expect non-null call instruction");
1285 if (isa<IntrinsicInst>(CB))
1289 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1292 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1296 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1297 Factor = Probe->Factor;
1301 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1305std::optional<InlineCost>
1306SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1307 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1308 if (ExternalInlineAdvisor) {
1309 Advice = ExternalInlineAdvisor->getAdvice(CB);
1311 if (!Advice->isInliningRecommended()) {
1312 Advice->recordUnattemptedInlining();
1315 Advice->recordInlining();
1323bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1324 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1329SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1330 if (std::optional<InlineCost> ReplayCost =
1331 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1337 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1344 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1357 GetTTI(*Callee), GetAC, GetTLI);
1360 if (
Cost.isNever() ||
Cost.isAlways())
1376 SampleContext &Context = Candidate.CalleeSamples->getContext();
1394bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1398 assert((!ProfAccForSymsInList ||
1400 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1401 "ProfAccForSymsInList should be false when profile-sample-accurate "
1406 CandidateQueue CQueue;
1407 InlineCandidate NewCandidate;
1408 for (
auto &BB :
F) {
1409 for (
auto &
I : BB) {
1410 auto *CB = dyn_cast<CallBase>(&
I);
1413 if (getInlineCandidate(&NewCandidate, CB))
1414 CQueue.push(NewCandidate);
1423 "Max inline size limit should not be smaller than min inline size "
1428 if (ExternalInlineAdvisor)
1429 SizeLimit = std::numeric_limits<unsigned>::max();
1434 bool Changed =
false;
1435 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1436 InlineCandidate Candidate = CQueue.top();
1439 Function *CalledFunction =
I->getCalledFunction();
1441 if (CalledFunction == &
F)
1443 if (
I->isIndirectCall()) {
1445 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1447 Sum *= Candidate.CallsiteDistribution;
1448 unsigned ICPCount = 0;
1449 for (
const auto *FS : CalleeSamples) {
1451 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1452 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1453 PSI->getOrCompHotCountThreshold());
1457 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1472 if (!PSI->isHotCount(EntryCountDistributed))
1477 Candidate = {
I,
FS, EntryCountDistributed,
1478 Candidate.CallsiteDistribution};
1479 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1480 &InlinedCallSites)) {
1481 for (
auto *CB : InlinedCallSites) {
1482 if (getInlineCandidate(&NewCandidate, CB))
1483 CQueue.emplace(NewCandidate);
1487 }
else if (!ContextTracker) {
1488 LocalNotInlinedCallSites.
insert({
I,
FS});
1491 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1494 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1495 for (
auto *CB : InlinedCallSites) {
1496 if (getInlineCandidate(&NewCandidate, CB))
1497 CQueue.emplace(NewCandidate);
1500 }
else if (!ContextTracker) {
1501 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1503 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1504 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1506 PSI->getOrCompHotCountThreshold());
1510 if (!CQueue.empty()) {
1512 ++NumCSInlinedHitMaxLimit;
1514 ++NumCSInlinedHitMinLimit;
1516 ++NumCSInlinedHitGrowthLimit;
1522 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1526void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1530 for (
const auto &Pair : NonInlinedCallSites) {
1533 if (!Callee ||
Callee->isDeclaration())
1538 I->getDebugLoc(),
I->getParent())
1539 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1540 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1544 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1558 if (
FS->getHeadSamples() == 0) {
1562 FS->getHeadSamplesEstimate());
1571 OutlineFS = &OutlineFunctionSamples[
1573 OutlineFS->
merge(*FS, 1);
1579 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1580 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1591 InstrProfValueData{
I.first.getHashCode(),
I.second});
1598void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1601 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1604 for (
auto &BI :
F) {
1607 if (BlockWeights[BB]) {
1608 for (
auto &
I : *BB) {
1609 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1621 FS->findCallTargetMapAt(CallSite);
1622 if (!
T ||
T.get().empty())
1629 if (Probe->Factor < 1)
1636 for (
const auto &
C :
T.get())
1643 FS->findFunctionSamplesMapAt(CallSite)) {
1644 for (
const auto &NameFS : *M)
1645 Sum += NameFS.second.getHeadSamplesEstimate();
1651 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1652 }
else if (!isa<IntrinsicInst>(&
I)) {
1660 for (
auto &
I : *BB) {
1661 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1662 if (cast<CallBase>(
I).isIndirectCall()) {
1663 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1674 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1675 !isa<IndirectBrInst>(TI))
1681 :
Twine(
"<UNKNOWN LOCATION>"))
1690 std::vector<uint64_t> EdgeIndex;
1695 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1696 EdgeMultiplicity[Succ]++;
1701 Edge E = std::make_pair(BB, Succ);
1707 if (Weight > std::numeric_limits<uint32_t>::max()) {
1709 Weight = std::numeric_limits<uint32_t>::max();
1715 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1720 uint64_t W = Weight / EdgeMultiplicity[Succ];
1722 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1727 if (Weight > MaxWeight) {
1729 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1745 if (MaxWeight > 0 &&
1751 <<
"most popular destination for conditional branches at "
1752 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1771bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1772 bool Changed =
false;
1776 if (!ProbeManager->getDesc(
F))
1777 dbgs() <<
"Probe descriptor missing for Function " <<
F.getName()
1781 if (ProbeManager->profileIsValid(
F, *Samples)) {
1782 ++NumMatchedProfile;
1784 ++NumMismatchedProfile;
1786 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1787 <<
F.getName() <<
"\n");
1792 if (getFunctionLoc(
F) == 0)
1796 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1801 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1803 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1805 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1808 generateMDProfMetadata(
F);
1810 emitCoverageRemarks(
F);
1814std::unique_ptr<ProfiledCallGraph>
1815SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1816 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1818 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1820 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1828 ProfiledCG->addProfiledFunction(
1835std::vector<Function *>
1837 std::vector<Function *> FunctionOrderList;
1838 FunctionOrderList.reserve(
M.size());
1841 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1842 "together with -sample-profile-top-down-load.\n";
1856 FunctionOrderList.push_back(&
F);
1857 return FunctionOrderList;
1910 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1922 FunctionOrderList.push_back(
F);
1933 FunctionOrderList.push_back(&
F);
1939 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1942 dbgs() <<
"Function processing order:\n";
1943 for (
auto F : FunctionOrderList) {
1944 dbgs() <<
F->getName() <<
"\n";
1948 return FunctionOrderList;
1951bool SampleProfileLoader::doInitialization(
Module &M,
1953 auto &Ctx =
M.getContext();
1956 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1957 if (std::error_code EC = ReaderOrErr.getError()) {
1958 std::string Msg =
"Could not open profile: " +
EC.message();
1962 Reader = std::move(ReaderOrErr.get());
1963 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1966 Reader->setModule(&M);
1967 if (std::error_code EC = Reader->read()) {
1968 std::string Msg =
"profile reading failed: " +
EC.message();
1973 PSL = Reader->getProfileSymbolList();
1976 ProfAccForSymsInList =
1978 if (ProfAccForSymsInList) {
1979 NamesInProfile.clear();
1980 GUIDsInProfile.clear();
1981 if (
auto NameTable = Reader->getNameTable()) {
1983 for (
auto Name : *NameTable)
1984 GUIDsInProfile.insert(
Name.getHashCode());
1986 for (
auto Name : *NameTable)
1987 NamesInProfile.insert(
Name.stringRef());
1990 CoverageTracker.setProfAccForSymsInList(
true);
1995 M, *
FAM, Ctx,
nullptr,
2000 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2004 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2005 Reader->profileIsProbeBased()) {
2021 if (Reader->profileIsPreInlined()) {
2031 if (Reader->profileIsProbeBased() &&
2036 if (!Reader->profileIsCS()) {
2048 if (Reader->profileIsCS()) {
2050 ContextTracker = std::make_unique<SampleContextTracker>(
2051 Reader->getProfiles(), &GUIDToFuncNameMap);
2055 if (Reader->profileIsProbeBased()) {
2056 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2057 if (!ProbeManager->moduleIsProbed(M)) {
2059 "Pseudo-probe-based profile requires SampleProfileProbePass";
2068 MatchingManager = std::make_unique<SampleProfileMatcher>(
2069 M, *Reader, ProbeManager.get(), LTOPhase);
2085bool SampleProfileLoader::rejectHighStalenessProfile(
2088 "Only support for probe-based profile");
2091 for (
const auto &
I : Profiles) {
2092 const auto &
FS =
I.second;
2093 const auto *FuncDesc = ProbeManager->getDesc(
FS.getGUID());
2099 FS.getTotalSamples()))
2103 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2104 NumMismatchedFunc++;
2112 if (NumMismatchedFunc * 100 >=
2114 auto &Ctx =
M.getContext();
2116 "The input profile significantly mismatches current source code. "
2117 "Please recollect profile to avoid performance regression.";
2124void SampleProfileLoader::removePseudoProbeInsts(
Module &M) {
2126 std::vector<Instruction *> InstsToDel;
2127 for (
auto &BB :
F) {
2128 for (
auto &
I : BB) {
2129 if (isa<PseudoProbeInst>(&
I))
2130 InstsToDel.push_back(&
I);
2133 for (
auto *
I : InstsToDel)
2134 I->eraseFromParent();
2141 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2144 if (
M.getProfileSummary(
false) ==
nullptr) {
2145 M.setProfileSummary(Reader->getSummary().getMD(
M.getContext()),
2151 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2155 for (
const auto &
I : Reader->getProfiles())
2156 TotalCollectedSamples +=
I.second.getTotalSamples();
2158 auto Remapper = Reader->getRemapper();
2160 for (
const auto &N_F :
M.getValueSymbolTable()) {
2162 Function *
F = dyn_cast<Function>(N_F.getValue());
2163 if (
F ==
nullptr || OrigName.
empty())
2167 if (OrigName != NewName && !NewName.
empty()) {
2174 r.first->second =
nullptr;
2179 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2180 if (*MapName != OrigName && !MapName->empty())
2186 "No empty StringRef should be added in SymbolMap");
2190 MatchingManager->runOnModule();
2191 MatchingManager->clearMatchingData();
2194 bool retval =
false;
2195 for (
auto *
F : buildFunctionOrder(M, CG)) {
2197 clearFunctionData();
2203 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2208 removePseudoProbeInsts(M);
2214 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2215 DILocation2SampleMap.clear();
2226 initialEntryCount = 0;
2229 ProfAccForSymsInList =
false;
2231 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2237 if (ProfAccForSymsInList) {
2239 if (PSL->contains(
F.getName()))
2240 initialEntryCount = 0;
2254 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2256 initialEntryCount = -1;
2261 if (!
F.getEntryCount())
2263 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2270 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2271 ORE = OwnedORE.get();
2275 Samples = ContextTracker->getBaseSamplesFor(
F);
2277 Samples = Reader->getSamplesFor(
F);
2282 auto It = OutlineFunctionSamples.find(
FunctionId(CanonName));
2283 if (It != OutlineFunctionSamples.end()) {
2284 Samples = &It->second;
2285 }
else if (
auto Remapper = Reader->getRemapper()) {
2286 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2287 It = OutlineFunctionSamples.find(
FunctionId(*RemppedName));
2288 if (It != OutlineFunctionSamples.end())
2289 Samples = &It->second;
2295 if (Samples && !Samples->
empty())
2296 return emitAnnotations(
F);
2302 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2303 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2323 SampleProfileLoader SampleLoader(
2326 : ProfileRemappingFileName,
2327 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2329 if (!SampleLoader.doInitialization(M, &
FAM))
2334 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void refresh()
If no summary is present, attempt to refresh.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
FunctionId getFunction() const
Return the function name.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
void setContextSynthetic()
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
bool hasState(ContextStateMask S)
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
Sample-based profile reader.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > ProfileInlineGrowthLimit
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.