90#include <system_error>
95using namespace sampleprof;
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
102 "Number of functions inlined with context sensitive profile");
104 "Number of functions not inlined with context sensitive profile");
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
109 "Number of inlined callsites with a partial distribution factor");
112 "Number of functions with FDO inline stopped due to min size limit");
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
134 cl::desc(
"Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
139 cl::desc(
"Compute and report stale profile statistical metrics."));
143 cl::desc(
"Compute stale profile statistical metrics and write it into the "
144 "native object file(.llvm_stats section)."));
148 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
149 "callsite and function as having 0 samples. Otherwise, treat "
150 "un-sampled callsites and functions conservatively as unknown. "));
154 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
155 "branches and calls as having 0 samples. Otherwise, treat "
156 "them conservatively as unknown. "));
160 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
161 "be accurate. It may be overriden by profile-sample-accurate. "));
165 cl::desc(
"Merge past inlinee's profile to outline version if sample "
166 "profile loader decided not to inline a call site. It will "
167 "only be enabled when top-down order of profile loading is "
172 cl::desc(
"Do profile annotation and inlining for functions in top-down "
173 "order of call graph during sample profile loading. It only "
174 "works for new pass manager. "));
178 cl::desc(
"Process functions in a top-down order "
179 "defined by the profiled call graph when "
180 "-sample-profile-top-down-load is on."));
184 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
192 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
193 "pass, and merge (or scale) profiles (as configured by "
194 "--sample-profile-merge-inlinee)."));
199 cl::desc(
"Sort profiled recursion by edge weights."));
203 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
204 "loader inlining."));
208 cl::desc(
"The lower bound of size growth limit for "
209 "proirity-based sample profile loader inlining."));
213 cl::desc(
"The upper bound of size growth limit for "
214 "proirity-based sample profile loader inlining."));
218 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
223 cl::desc(
"Threshold for inlining cold callsites"));
229 "Relative hotness percentage threshold for indirect "
230 "call promotion in proirity-based sample profile loader inlining."));
235 "Skip relative hotness check for ICP up to given number of targets."));
238 "sample-profile-prioritized-inline",
cl::Hidden,
240 cl::desc(
"Use call site prioritized inlining for sample profile loader."
241 "Currently only CSSPGO is supported."));
246 cl::desc(
"Use the preinliner decisions stored in profile context."));
249 "sample-profile-recursive-inline",
cl::Hidden,
251 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
256 "Optimization remarks file containing inline remarks to be replayed "
257 "by inlining from sample profile loader."),
261 "sample-profile-inline-replay-scope",
262 cl::init(ReplayInlinerSettings::Scope::Function),
264 "Replay on functions that have remarks associated "
265 "with them (default)"),
266 clEnumValN(ReplayInlinerSettings::Scope::Module,
"Module",
267 "Replay on the entire module")),
268 cl::desc(
"Whether inline replay should be applied to the entire "
269 "Module or just the Functions (default) that are present as "
270 "callers in remarks during sample profile inlining."),
274 "sample-profile-inline-replay-fallback",
275 cl::init(ReplayInlinerSettings::Fallback::Original),
278 ReplayInlinerSettings::Fallback::Original,
"Original",
279 "All decisions not in replay send to original advisor (default)"),
280 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
281 "AlwaysInline",
"All decisions not in replay are inlined"),
282 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline,
"NeverInline",
283 "All decisions not in replay are not inlined")),
284 cl::desc(
"How sample profile inline replay treats sites that don't come "
285 "from the replay. Original: defers to original advisor, "
286 "AlwaysInline: inline all sites not in replay, NeverInline: "
287 "inline no sites not in replay"),
291 "sample-profile-inline-replay-format",
292 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
294 clEnumValN(CallSiteFormat::Format::Line,
"Line",
"<Line Number>"),
295 clEnumValN(CallSiteFormat::Format::LineColumn,
"LineColumn",
296 "<Line Number>:<Column Number>"),
297 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
298 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
299 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
300 "LineColumnDiscriminator",
301 "<Line Number>:<Column Number>.<Discriminator> (default)")),
306 cl::desc(
"Max number of promotions for a single indirect "
307 "call callsite in sample profile loader"));
311 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
315 cl::desc(
"Annotate LTO phase (prelink / postlink), or main (no LTO) for "
316 "sample-profile inline pass name."));
326using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
331class GUIDToFuncNameMapper {
336 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
340 for (
const auto &
F : CurrentModule) {
342 CurrentGUIDToFuncNameMap.insert(
343 {Function::getGUID(OrigName), OrigName});
353 if (CanonName != OrigName)
354 CurrentGUIDToFuncNameMap.insert(
355 {Function::getGUID(CanonName), CanonName});
359 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
362 ~GUIDToFuncNameMapper() {
366 CurrentGUIDToFuncNameMap.clear();
370 SetGUIDToFuncNameMapForAll(
nullptr);
375 std::queue<FunctionSamples *> FSToUpdate;
377 FSToUpdate.push(&IFS.second);
380 while (!FSToUpdate.empty()) {
383 FS->GUIDToFuncNameMap = Map;
384 for (
const auto &ICS : FS->getCallsiteSamples()) {
386 for (
const auto &IFS : FSMap) {
388 FSToUpdate.push(&FS);
400struct InlineCandidate {
410 float CallsiteDistribution;
414struct CandidateComparer {
415 bool operator()(
const InlineCandidate &LHS,
const InlineCandidate &RHS) {
416 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
417 return LHS.CallsiteCount <
RHS.CallsiteCount;
421 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
432using CandidateQueue =
437class SampleProfileMatcher {
448 uint64_t TotalProfiledCallsites = 0;
449 uint64_t NumMismatchedCallsites = 0;
450 uint64_t MismatchedCallsiteSamples = 0;
454 uint64_t MismatchedFuncHashSamples = 0;
459 static constexpr const char *UnknownIndirectCallee =
460 "unknown.indirect.callee";
465 :
M(
M), Reader(Reader), ProbeManager(ProbeManager){};
472 if (It != FlattenedProfiles.end())
478 std::map<LineLocation, StringRef> &IRAnchors);
479 void findProfileAnchors(
484 void countProfileMismatches(
486 const std::map<LineLocation, StringRef> &IRAnchors,
487 const std::map<
LineLocation, std::unordered_set<FunctionId>>
489 void countProfileCallsiteMismatches(
491 const std::map<LineLocation, StringRef> &IRAnchors,
492 const std::map<
LineLocation, std::unordered_set<FunctionId>>
498 return Ret.first->second;
500 void distributeIRToProfileLocationMap();
502 void runStaleProfileMatching(
503 const Function &
F,
const std::map<LineLocation, StringRef> &IRAnchors,
504 const std::map<
LineLocation, std::unordered_set<FunctionId>>
524 GetAC(
std::
move(GetAssumptionCache)),
525 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
543 std::vector<const FunctionSamples *>
549 bool tryPromoteAndInlineCandidate(
555 std::optional<InlineCost> getExternalInlineAdvisorCost(
CallBase &CB);
556 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
557 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
558 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
560 tryInlineCandidate(InlineCandidate &Candidate,
563 inlineHotFunctionsWithPriority(
Function &
F,
567 void emitOptimizationRemarksForInlineCandidates(
570 void promoteMergeNotInlinedContextSamples(
574 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
Module &M);
575 void generateMDProfMetadata(
Function &
F);
588 std::unique_ptr<SampleContextTracker> ContextTracker;
596 const std::string AnnotatedPassName;
600 std::unique_ptr<ProfileSymbolList> PSL;
611 struct NotInlinedProfileInfo {
634 bool ProfAccForSymsInList;
637 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
640 std::unique_ptr<SampleProfileMatcher> MatchingManager;
643 const char *getAnnotatedRemarkPassName()
const {
644 return AnnotatedPassName.c_str();
657 const std::vector<const BasicBlockT *> &BasicBlocks,
659 for (
auto &Jump :
Func.Jumps) {
660 const auto *BB = BasicBlocks[Jump.Source];
661 const auto *Succ = BasicBlocks[Jump.Target];
665 if (Successors[BB].
size() == 2 && Successors[BB].back() == Succ) {
666 if (isa<InvokeInst>(TI)) {
667 Jump.IsUnlikely =
true;
673 if (isa<UnreachableInst>(SuccTI)) {
674 Jump.IsUnlikely =
true;
695 return getProbeWeight(Inst);
699 return std::error_code();
704 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
705 return std::error_code();
714 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
715 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
718 return getInstWeightImpl(Inst);
734SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
742 CalleeName =
Callee->getName();
745 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
758std::vector<const FunctionSamples *>
759SampleProfileLoader::findIndirectCallFunctionSamples(
762 std::vector<const FunctionSamples *>
R;
769 assert(L && R &&
"Expect non-null FunctionSamples");
770 if (
L->getHeadSamplesEstimate() !=
R->getHeadSamplesEstimate())
771 return L->getHeadSamplesEstimate() >
R->getHeadSamplesEstimate();
772 return L->getGUID() <
R->getGUID();
777 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
778 if (CalleeSamples.empty())
784 for (
const auto *
const FS : CalleeSamples) {
785 Sum +=
FS->getHeadSamplesEstimate();
797 auto T =
FS->findCallTargetMapAt(CallSite);
800 for (
const auto &T_C :
T.get())
805 for (
const auto &NameFS : *M) {
806 Sum += NameFS.second.getHeadSamplesEstimate();
807 R.push_back(&NameFS.second);
815SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
826 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
829 it.first->second = ContextTracker->getContextSamplesFor(DIL);
832 Samples->findFunctionSamples(DIL, Reader->
getRemapper());
834 return it.first->second;
847 std::unique_ptr<InstrProfValueData[]> ValueData =
851 ValueData.get(), NumVals, TotalCount,
true);
857 unsigned NumPromoted = 0;
865 if (ValueData[
I].
Value == Function::getGUID(Candidate))
894 std::unique_ptr<InstrProfValueData[]> ValueData =
898 ValueData.get(), NumVals, OldSum,
true);
904 "If sum is 0, assume only one element in CallTargets "
905 "with count being NOMORE_ICP_MAGICNUM");
909 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
916 OldSum -= Pair.first->second;
926 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
930 for (
const auto &Data : CallTargets) {
931 auto Pair = ValueCountMap.
try_emplace(Data.Value, Data.Count);
937 assert(Sum >= Data.Count &&
"Sum should never be less than Data.Count");
943 for (
const auto &ValueCount : ValueCountMap) {
945 InstrProfValueData{ValueCount.first, ValueCount.second});
949 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
950 if (L.Count != R.Count)
951 return L.Count > R.Count;
952 return L.Value > R.Value;
958 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
971bool SampleProfileLoader::tryPromoteAndInlineCandidate(
982 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
987 auto &CI = *Candidate.CallInstr;
991 const char *Reason =
"Callee function not available";
998 if (!
R->second->isDeclaration() &&
R->second->getSubprogram() &&
999 R->second->hasFnAttribute(
"use-sample-profile") &&
1008 CI,
R->second, Candidate.CallsiteCount, Sum,
false, ORE);
1010 Sum -= Candidate.CallsiteCount;
1023 Candidate.CallInstr = DI;
1024 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
1025 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
1030 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
1038 Candidate.CallInstr->getName())<<
" because "
1049 if (Callee ==
nullptr)
1058 if (
Cost.isAlways())
1064void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1067 for (
auto *
I : Candidates) {
1068 Function *CalledFunction =
I->getCalledFunction();
1069 if (CalledFunction) {
1071 "InlineAttempt",
I->getDebugLoc(),
1073 <<
"previous inlining reattempted for "
1074 << (
Hot ?
"hotness: '" :
"size: '")
1075 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1081void SampleProfileLoader::findExternalInlineCandidate(
1087 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1115 std::queue<ContextTrieNode *> CalleeList;
1116 CalleeList.push(Caller);
1117 while (!CalleeList.empty()) {
1136 if (!Func ||
Func->isDeclaration())
1142 for (
const auto &TS : BS.second.getCallTargets())
1143 if (TS.second > Threshold) {
1145 if (!Callee ||
Callee->isDeclaration())
1146 InlinedGUIDs.
insert(TS.first.getHashCode());
1153 for (
auto &Child :
Node->getAllChildContext()) {
1155 CalleeList.push(CalleeNode);
1182bool SampleProfileLoader::inlineHotFunctions(
1186 assert((!ProfAccForSymsInList ||
1188 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1189 "ProfAccForSymsInList should be false when profile-sample-accurate "
1193 bool Changed =
false;
1194 bool LocalChanged =
true;
1195 while (LocalChanged) {
1196 LocalChanged =
false;
1198 for (
auto &BB :
F) {
1202 for (
auto &
I : BB) {
1204 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1205 if (!isa<IntrinsicInst>(
I)) {
1206 if ((FS = findCalleeFunctionSamples(*CB))) {
1208 "GUIDToFuncNameMap has to be populated");
1210 if (
FS->getHeadSamplesEstimate() > 0 ||
1212 LocalNotInlinedCallSites.
insert({CB,
FS});
1215 else if (shouldInlineColdCallee(*CB))
1217 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1223 if (
Hot || ExternalInlineAdvisor) {
1225 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1228 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1232 Function *CalledFunction =
I->getCalledFunction();
1233 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1237 if (CalledFunction == &
F)
1239 if (
I->isIndirectCall()) {
1241 for (
const auto *FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1243 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1244 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1245 PSI->getOrCompHotCountThreshold());
1251 Candidate = {
I,
FS,
FS->getHeadSamplesEstimate(), 1.0};
1252 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1253 LocalNotInlinedCallSites.
erase(
I);
1254 LocalChanged =
true;
1257 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1259 if (tryInlineCandidate(Candidate)) {
1260 LocalNotInlinedCallSites.
erase(
I);
1261 LocalChanged =
true;
1263 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1264 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1266 PSI->getOrCompHotCountThreshold());
1269 Changed |= LocalChanged;
1275 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1279bool SampleProfileLoader::tryInlineCandidate(
1286 CallBase &CB = *Candidate.CallInstr;
1288 assert(CalledFunction &&
"Expect a callee with definition");
1293 if (
Cost.isNever()) {
1295 "InlineFail", DLoc, BB)
1296 <<
"incompatible inlining");
1304 IFI.UpdateProfile =
false;
1307 if (!
IR.isSuccess())
1312 Cost,
true, getAnnotatedRemarkPassName());
1315 if (InlinedCallSites) {
1316 InlinedCallSites->
clear();
1317 for (
auto &
I : IFI.InlinedCallSites)
1322 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1332 if (Candidate.CallsiteDistribution < 1) {
1333 for (
auto &
I : IFI.InlinedCallSites) {
1336 Candidate.CallsiteDistribution);
1338 NumDuplicatedInlinesite++;
1344bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1346 assert(CB &&
"Expect non-null call instruction");
1348 if (isa<IntrinsicInst>(CB))
1352 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1355 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1359 if (std::optional<PseudoProbe> Probe =
extractProbe(*CB))
1360 Factor = Probe->Factor;
1364 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1368std::optional<InlineCost>
1369SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1370 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1371 if (ExternalInlineAdvisor) {
1372 Advice = ExternalInlineAdvisor->getAdvice(CB);
1374 if (!Advice->isInliningRecommended()) {
1375 Advice->recordUnattemptedInlining();
1378 Advice->recordInlining();
1386bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1387 std::optional<InlineCost>
Cost = getExternalInlineAdvisorCost(CB);
1392SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1393 if (std::optional<InlineCost> ReplayCost =
1394 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1400 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1407 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1420 GetTTI(*Callee), GetAC, GetTLI);
1423 if (
Cost.isNever() ||
Cost.isAlways())
1456bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1460 assert((!ProfAccForSymsInList ||
1462 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1463 "ProfAccForSymsInList should be false when profile-sample-accurate "
1468 CandidateQueue CQueue;
1469 InlineCandidate NewCandidate;
1470 for (
auto &BB :
F) {
1471 for (
auto &
I : BB) {
1472 auto *CB = dyn_cast<CallBase>(&
I);
1475 if (getInlineCandidate(&NewCandidate, CB))
1476 CQueue.push(NewCandidate);
1485 "Max inline size limit should not be smaller than min inline size "
1490 if (ExternalInlineAdvisor)
1491 SizeLimit = std::numeric_limits<unsigned>::max();
1496 bool Changed =
false;
1497 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1498 InlineCandidate Candidate = CQueue.top();
1501 Function *CalledFunction =
I->getCalledFunction();
1503 if (CalledFunction == &
F)
1505 if (
I->isIndirectCall()) {
1507 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1509 Sum *= Candidate.CallsiteDistribution;
1510 unsigned ICPCount = 0;
1511 for (
const auto *FS : CalleeSamples) {
1513 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1514 findExternalInlineCandidate(
I, FS, InlinedGUIDs,
1515 PSI->getOrCompHotCountThreshold());
1519 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1534 if (!PSI->isHotCount(EntryCountDistributed))
1539 Candidate = {
I,
FS, EntryCountDistributed,
1540 Candidate.CallsiteDistribution};
1541 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1542 &InlinedCallSites)) {
1543 for (
auto *CB : InlinedCallSites) {
1544 if (getInlineCandidate(&NewCandidate, CB))
1545 CQueue.emplace(NewCandidate);
1549 }
else if (!ContextTracker) {
1550 LocalNotInlinedCallSites.
insert({
I,
FS});
1553 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1556 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1557 for (
auto *CB : InlinedCallSites) {
1558 if (getInlineCandidate(&NewCandidate, CB))
1559 CQueue.emplace(NewCandidate);
1562 }
else if (!ContextTracker) {
1563 LocalNotInlinedCallSites.
insert({
I, Candidate.CalleeSamples});
1565 }
else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1566 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1568 PSI->getOrCompHotCountThreshold());
1572 if (!CQueue.empty()) {
1574 ++NumCSInlinedHitMaxLimit;
1576 ++NumCSInlinedHitMinLimit;
1578 ++NumCSInlinedHitGrowthLimit;
1584 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1588void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1592 for (
const auto &Pair : NonInlinedCallSites) {
1595 if (!Callee ||
Callee->isDeclaration())
1600 I->getDebugLoc(),
I->getParent())
1601 <<
"previous inlining not repeated: '" <<
ore::NV(
"Callee", Callee)
1602 <<
"' into '" <<
ore::NV(
"Caller", &
F) <<
"'");
1606 if (
FS->getTotalSamples() == 0 &&
FS->getHeadSamplesEstimate() == 0) {
1620 if (
FS->getHeadSamples() == 0) {
1624 FS->getHeadSamplesEstimate());
1633 OutlineFS = &OutlineFunctionSamples[
1635 OutlineFS->
merge(*FS, 1);
1641 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1642 pair.first->second.entryCount +=
FS->getHeadSamplesEstimate();
1653 InstrProfValueData{
I.first.getHashCode(),
I.second});
1660void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1663 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1666 for (
auto &BI :
F) {
1669 if (BlockWeights[BB]) {
1670 for (
auto &
I : *BB) {
1671 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1682 auto T =
FS->findCallTargetMapAt(CallSite);
1683 if (!
T ||
T.get().empty())
1690 if (Probe->Factor < 1)
1697 for (
const auto &
C :
T.get())
1704 FS->findFunctionSamplesMapAt(CallSite)) {
1705 for (
const auto &NameFS : *M)
1706 Sum += NameFS.second.getHeadSamplesEstimate();
1712 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1713 }
else if (!isa<IntrinsicInst>(&
I)) {
1720 for (
auto &
I : *BB) {
1721 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1722 if (cast<CallBase>(
I).isIndirectCall()) {
1723 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1734 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1735 !isa<IndirectBrInst>(TI))
1741 :
Twine(
"<UNKNOWN LOCATION>"))
1750 std::vector<uint64_t> EdgeIndex;
1755 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1756 EdgeMultiplicity[Succ]++;
1761 Edge
E = std::make_pair(BB, Succ);
1767 if (Weight > std::numeric_limits<uint32_t>::max()) {
1769 Weight = std::numeric_limits<uint32_t>::max();
1778 uint64_t W = Weight / EdgeMultiplicity[Succ];
1780 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1785 if (Weight > MaxWeight) {
1787 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1803 if (MaxWeight > 0 &&
1809 <<
"most popular destination for conditional branches at "
1810 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1829bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1830 bool Changed =
false;
1835 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1836 <<
F.getName() <<
"\n");
1837 ++NumMismatchedProfile;
1841 ++NumMatchedProfile;
1843 if (getFunctionLoc(
F) == 0)
1847 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1852 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1854 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1856 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1859 generateMDProfMetadata(
F);
1861 emitCoverageRemarks(
F);
1865std::unique_ptr<ProfiledCallGraph>
1866SampleProfileLoader::buildProfiledCallGraph(
Module &M) {
1867 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1869 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1871 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->
getProfiles());
1877 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
1879 ProfiledCG->addProfiledFunction(
1886std::vector<Function *>
1888 std::vector<Function *> FunctionOrderList;
1889 FunctionOrderList.reserve(
M.size());
1892 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1893 "together with -sample-profile-top-down-load.\n";
1906 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1907 FunctionOrderList.push_back(&
F);
1908 return FunctionOrderList;
1961 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1970 for (
auto *
Node : Range) {
1972 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1973 FunctionOrderList.push_back(
F);
1983 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1984 FunctionOrderList.push_back(&
F);
1990 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1993 dbgs() <<
"Function processing order:\n";
1994 for (
auto F : FunctionOrderList) {
1995 dbgs() <<
F->getName() <<
"\n";
1999 return FunctionOrderList;
2002bool SampleProfileLoader::doInitialization(
Module &M,
2004 auto &Ctx =
M.getContext();
2007 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
2008 if (std::error_code EC = ReaderOrErr.getError()) {
2009 std::string
Msg =
"Could not open profile: " +
EC.message();
2013 Reader = std::move(ReaderOrErr.get());
2014 Reader->
setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
2018 if (std::error_code EC = Reader->
read()) {
2019 std::string
Msg =
"profile reading failed: " +
EC.message();
2027 ProfAccForSymsInList =
2029 if (ProfAccForSymsInList) {
2030 NamesInProfile.clear();
2031 GUIDsInProfile.clear();
2034 for (
auto Name : *NameTable)
2035 GUIDsInProfile.insert(
Name.getHashCode());
2037 for (
auto Name : *NameTable)
2038 NamesInProfile.insert(
Name.stringRef());
2041 CoverageTracker.setProfAccForSymsInList(
true);
2046 M, *
FAM, Ctx,
nullptr,
2051 false,
InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2101 ContextTracker = std::make_unique<SampleContextTracker>(
2107 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2110 "Pseudo-probe-based profile requires SampleProfileProbePass";
2120 std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
2126void SampleProfileMatcher::findIRAnchors(
2127 const Function &
F, std::map<LineLocation, StringRef> &IRAnchors) {
2131 auto FindTopLevelInlinedCallsite = [](
const DILocation *DIL) {
2132 assert((DIL && DIL->getInlinedAt()) &&
"No inlined callsite");
2136 DIL = DIL->getInlinedAt();
2137 }
while (DIL->getInlinedAt());
2140 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
2141 return std::make_pair(Callsite, CalleeName);
2144 auto GetCanonicalCalleeName = [](
const CallBase *CB) {
2145 StringRef CalleeName = UnknownIndirectCallee;
2152 for (
auto &BB :
F) {
2153 for (
auto &
I : BB) {
2161 if (DIL->getInlinedAt()) {
2162 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
2166 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
2168 if (!isa<IntrinsicInst>(&
I))
2169 CalleeName = GetCanonicalCalleeName(CB);
2171 IRAnchors.emplace(
LineLocation(Probe->Id, 0), CalleeName);
2178 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
2181 if (DIL->getInlinedAt()) {
2182 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
2185 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&
I));
2186 IRAnchors.emplace(Callsite, CalleeName);
2193void SampleProfileMatcher::countMismatchedSamples(
const FunctionSamples &FS) {
2194 const auto *FuncDesc = ProbeManager->
getDesc(
FS.getGUID());
2200 MismatchedFuncHashSamples +=
FS.getTotalSamples();
2203 for (
const auto &
I :
FS.getCallsiteSamples())
2204 for (
const auto &CS :
I.second)
2205 countMismatchedSamples(CS.second);
2208void SampleProfileMatcher::countProfileMismatches(
2210 const std::map<LineLocation, StringRef> &IRAnchors,
2211 const std::map<
LineLocation, std::unordered_set<FunctionId>>
2213 [[maybe_unused]]
bool IsFuncHashMismatch =
false;
2215 TotalFuncHashSamples +=
FS.getTotalSamples();
2216 TotalProfiledFunc++;
2217 const auto *FuncDesc = ProbeManager->
getDesc(
F);
2220 NumMismatchedFuncHash++;
2221 IsFuncHashMismatch =
true;
2223 countMismatchedSamples(FS);
2227 uint64_t FuncMismatchedCallsites = 0;
2228 uint64_t FuncProfiledCallsites = 0;
2229 countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
2230 FuncMismatchedCallsites,
2231 FuncProfiledCallsites);
2232 TotalProfiledCallsites += FuncProfiledCallsites;
2233 NumMismatchedCallsites += FuncMismatchedCallsites;
2236 FuncMismatchedCallsites)
2237 dbgs() <<
"Function checksum is matched but there are "
2238 << FuncMismatchedCallsites <<
"/" << FuncProfiledCallsites
2239 <<
" mismatched callsites.\n";
2243void SampleProfileMatcher::countProfileCallsiteMismatches(
2245 const std::map<LineLocation, StringRef> &IRAnchors,
2246 const std::map<
LineLocation, std::unordered_set<FunctionId>>
2252 for (
const auto &
I : ProfileAnchors) {
2253 const auto &Loc =
I.first;
2254 const auto &Callees =
I.second;
2255 assert(!Callees.empty() &&
"Callees should not be empty");
2258 const auto &
IR = IRAnchors.find(Loc);
2259 if (
IR != IRAnchors.end())
2260 IRCalleeName =
IR->second;
2264 auto CTM =
FS.findCallTargetMapAt(Loc);
2266 for (
const auto &
I : CTM.get())
2267 CallsiteSamples +=
I.second;
2269 const auto *FSMap =
FS.findFunctionSamplesMapAt(Loc);
2271 for (
const auto &
I : *FSMap)
2272 CallsiteSamples +=
I.second.getTotalSamples();
2275 bool CallsiteIsMatched =
false;
2280 if (IRCalleeName == UnknownIndirectCallee)
2281 CallsiteIsMatched =
true;
2282 else if (Callees.size() == 1 && Callees.count(
getRepInFormat(IRCalleeName)))
2283 CallsiteIsMatched =
true;
2285 FuncProfiledCallsites++;
2286 TotalCallsiteSamples += CallsiteSamples;
2287 if (!CallsiteIsMatched) {
2288 FuncMismatchedCallsites++;
2289 MismatchedCallsiteSamples += CallsiteSamples;
2294void SampleProfileMatcher::findProfileAnchors(
const FunctionSamples &FS,
2295 std::map<
LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
2296 auto isInvalidLineOffset = [](
uint32_t LineOffset) {
2297 return LineOffset & 0x8000;
2300 for (
const auto &
I :
FS.getBodySamples()) {
2304 for (
const auto &
I :
I.second.getCallTargets()) {
2305 auto Ret = ProfileAnchors.try_emplace(Loc,
2306 std::unordered_set<FunctionId>());
2307 Ret.first->second.insert(
I.first);
2311 for (
const auto &
I :
FS.getCallsiteSamples()) {
2315 const auto &CalleeMap =
I.second;
2316 for (
const auto &
I : CalleeMap) {
2317 auto Ret = ProfileAnchors.try_emplace(Loc,
2318 std::unordered_set<FunctionId>());
2319 Ret.first->second.insert(
I.first);
2341void SampleProfileMatcher::runStaleProfileMatching(
2343 const std::map<LineLocation, StringRef> &IRAnchors,
2344 const std::map<
LineLocation, std::unordered_set<FunctionId>>
2349 assert(IRToProfileLocationMap.empty() &&
2350 "Run stale profile matching only once per function");
2352 std::unordered_map<FunctionId, std::set<LineLocation>>
2353 CalleeToCallsitesMap;
2354 for (
const auto &
I : ProfileAnchors) {
2355 const auto &Loc =
I.first;
2356 const auto &Callees =
I.second;
2358 if (Callees.size() == 1) {
2360 const auto &Candidates = CalleeToCallsitesMap.try_emplace(
2361 CalleeName, std::set<LineLocation>());
2362 Candidates.first->second.insert(Loc);
2369 IRToProfileLocationMap.insert({
From, To});
2373 int32_t LocationDelta = 0;
2376 for (
const auto &
IR : IRAnchors) {
2377 const auto &Loc =
IR.first;
2378 auto CalleeName =
IR.second;
2379 bool IsMatchedAnchor =
false;
2381 if (!CalleeName.
empty()) {
2382 auto CandidateAnchors = CalleeToCallsitesMap.find(
2384 if (CandidateAnchors != CalleeToCallsitesMap.end() &&
2385 !CandidateAnchors->second.empty()) {
2386 auto CI = CandidateAnchors->second.begin();
2387 const auto Candidate = *CI;
2388 CandidateAnchors->second.erase(CI);
2389 InsertMatching(Loc, Candidate);
2391 <<
" is matched from " << Loc <<
" to " << Candidate
2393 LocationDelta = Candidate.LineOffset - Loc.
LineOffset;
2399 for (
size_t I = (LastMatchedNonAnchors.
size() + 1) / 2;
2400 I < LastMatchedNonAnchors.
size();
I++) {
2401 const auto &
L = LastMatchedNonAnchors[
I];
2402 uint32_t CandidateLineOffset =
L.LineOffset + LocationDelta;
2403 LineLocation Candidate(CandidateLineOffset,
L.Discriminator);
2404 InsertMatching(L, Candidate);
2405 LLVM_DEBUG(
dbgs() <<
"Location is rematched backwards from " << L
2406 <<
" to " << Candidate <<
"\n");
2409 IsMatchedAnchor =
true;
2410 LastMatchedNonAnchors.
clear();
2415 if (!IsMatchedAnchor) {
2418 InsertMatching(Loc, Candidate);
2419 LLVM_DEBUG(
dbgs() <<
"Location is matched from " << Loc <<
" to "
2420 << Candidate <<
"\n");
2426void SampleProfileMatcher::runOnFunction(
const Function &
F) {
2433 const auto *FSFlattened = getFlattenedSamplesFor(
F);
2440 std::map<LineLocation, StringRef> IRAnchors;
2441 findIRAnchors(
F, IRAnchors);
2444 std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
2445 findProfileAnchors(*FSFlattened, ProfileAnchors);
2455 countProfileMismatches(
F, *FS, IRAnchors, ProfileAnchors);
2464 runStaleProfileMatching(
F, IRAnchors, ProfileAnchors,
2465 getIRToProfileLocationMap(
F));
2469void SampleProfileMatcher::runOnModule() {
2473 if (
F.isDeclaration() || !
F.hasFnAttribute(
"use-sample-profile"))
2478 distributeIRToProfileLocationMap();
2482 errs() <<
"(" << NumMismatchedFuncHash <<
"/" << TotalProfiledFunc <<
")"
2483 <<
" of functions' profile are invalid and "
2484 <<
" (" << MismatchedFuncHashSamples <<
"/" << TotalFuncHashSamples
2486 <<
" of samples are discarded due to function hash mismatch.\n";
2488 errs() <<
"(" << NumMismatchedCallsites <<
"/" << TotalProfiledCallsites
2490 <<
" of callsites' profile are invalid and "
2491 <<
"(" << MismatchedCallsiteSamples <<
"/" << TotalCallsiteSamples
2493 <<
" of samples are discarded due to callsite location mismatch.\n";
2502 ProfStatsVec.
emplace_back(
"NumMismatchedFuncHash", NumMismatchedFuncHash);
2503 ProfStatsVec.
emplace_back(
"TotalProfiledFunc", TotalProfiledFunc);
2505 MismatchedFuncHashSamples);
2506 ProfStatsVec.
emplace_back(
"TotalFuncHashSamples", TotalFuncHashSamples);
2509 ProfStatsVec.
emplace_back(
"NumMismatchedCallsites", NumMismatchedCallsites);
2510 ProfStatsVec.
emplace_back(
"TotalProfiledCallsites", TotalProfiledCallsites);
2512 MismatchedCallsiteSamples);
2513 ProfStatsVec.
emplace_back(
"TotalCallsiteSamples", TotalCallsiteSamples);
2515 auto *MD = MDB.createLLVMStats(ProfStatsVec);
2516 auto *NMD =
M.getOrInsertNamedMetadata(
"llvm.stats");
2517 NMD->addOperand(MD);
2521void SampleProfileMatcher::distributeIRToProfileLocationMap(
2523 const auto ProfileMappings = FuncMappings.
find(
FS.getFuncName());
2524 if (ProfileMappings != FuncMappings.
end()) {
2525 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
2528 for (
auto &Inlinees :
FS.getCallsiteSamples()) {
2529 for (
auto FS : Inlinees.second) {
2530 distributeIRToProfileLocationMap(
FS.second);
2537void SampleProfileMatcher::distributeIRToProfileLocationMap() {
2539 distributeIRToProfileLocationMap(
I.second);
2546 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2549 if (
M.getProfileSummary(
false) ==
nullptr) {
2556 TotalCollectedSamples +=
I.second.getTotalSamples();
2560 for (
const auto &N_F :
M.getValueSymbolTable()) {
2562 Function *
F = dyn_cast<Function>(N_F.getValue());
2563 if (
F ==
nullptr || OrigName.
empty())
2567 if (OrigName != NewName && !NewName.
empty()) {
2574 r.first->second =
nullptr;
2579 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2580 if (*MapName != OrigName && !MapName->empty())
2586 "No empty StringRef should be added in SymbolMap");
2590 MatchingManager->runOnModule();
2593 bool retval =
false;
2594 for (
auto *
F : buildFunctionOrder(M, CG)) {
2596 clearFunctionData();
2602 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2610 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2611 DILocation2SampleMap.clear();
2622 initialEntryCount = 0;
2625 ProfAccForSymsInList =
false;
2627 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2633 if (ProfAccForSymsInList) {
2635 if (PSL->contains(
F.getName()))
2636 initialEntryCount = 0;
2650 GUIDsInProfile.count(Function::getGUID(CanonName))) ||
2652 initialEntryCount = -1;
2657 if (!
F.getEntryCount())
2659 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2666 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2667 ORE = OwnedORE.get();
2671 Samples = ContextTracker->getBaseSamplesFor(
F);
2678 auto It = OutlineFunctionSamples.find(
FunctionId(CanonName));
2679 if (It != OutlineFunctionSamples.end()) {
2680 Samples = &It->second;
2681 }
else if (
auto Remapper = Reader->
getRemapper()) {
2682 if (
auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2683 It = OutlineFunctionSamples.find(
FunctionId(*RemppedName));
2684 if (It != OutlineFunctionSamples.end())
2685 Samples = &It->second;
2691 if (Samples && !Samples->
empty())
2692 return emitAnnotations(
F);
2698 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2699 LTOPhase(LTOPhase), FS(
std::
move(FS)) {}
2719 SampleProfileLoader SampleLoader(
2722 : ProfileRemappingFileName,
2723 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
2725 if (!SampleLoader.doInitialization(M, &
FAM))
2730 if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
This file defines the StringMap class.
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
static cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
static cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Class to represent profile counts.
DISubprogram * getSubprogram() const
Get the attached subprogram.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
InlineResult is basically true or false.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
const BasicBlock * getParent() const
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
This class implements a map that also provides access to all stored values in a deterministic order.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ValueT lookup(const KeyT &Key) const
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
iterator find(StringRef Key)
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringSet - A wrapper for StringMap that provides set-like functionality.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
This class represents a function that is read from a sample profile.
Representation of the samples collected for a function.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
FunctionId getFunction() const
Return the function name.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void SetContextSynthetic()
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static bool UseMD5
Whether the profile uses MD5 to represent string.
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
This class provides operator overloads to the map container using MD5 as the key type,...
iterator find(const SampleContext &Ctx)
Sample-based profile reader.
SampleProfileMap & getProfiles()
Return all the profiles.
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
std::error_code read()
The interface to read sample profiles from the associated file.
SampleProfileReaderItaniumRemapper * getRemapper()
void setModule(const Module *Mod)
ProfileSummary & getSummary() const
Return the profile summary.
virtual std::vector< FunctionId > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
bool profileIsCS() const
Whether input profile is fully context-sensitive.
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
std::unordered_map< FunctionId, uint64_t > CallTargetMap
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
@ ContextDuplicatedIntoBase
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
void setProbeDistributionFactor(Instruction &Inst, float Factor)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
cl::opt< bool > SampleProfileUseProfi
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
llvm::cl::opt< bool > UseIterativeBFIInference
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
cl::opt< int > SampleHotCallSiteThreshold
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< int > SampleColdCallSiteThreshold
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > SortProfiledSCC
cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
cl::opt< int > ProfileInlineGrowthLimit
Implement std::hash so that hash_code can be used in STL containers.
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Represents the relative location of an instruction.