Go to the documentation of this file.
88 #include <system_error>
93 using namespace sampleprof;
96 #define DEBUG_TYPE "sample-profile"
97 #define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100 "Number of functions inlined with context sensitive profile");
102 "Number of functions not inlined with context sensitive profile");
104 "Number of functions with CFG mismatched profile");
105 STATISTIC(NumMatchedProfile,
"Number of functions with CFG matched profile");
107 "Number of inlined callsites with a partial distribution factor");
110 "Number of functions with FDO inline stopped due to min size limit");
112 "Number of functions with FDO inline stopped due to max size limit");
114 NumCSInlinedHitGrowthLimit,
115 "Number of functions with FDO inline stopped due to growth size limit");
132 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
133 "callsite and function as having 0 samples. Otherwise, treat "
134 "un-sampled callsites and functions conservatively as unknown. "));
138 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled "
139 "branches and calls as having 0 samples. Otherwise, treat "
140 "them conservatively as unknown. "));
145 cl::desc(
"For symbols in profile symbol list, regard their profiles to "
146 "be accurate. It may be overriden by profile-sample-accurate. "));
150 cl::desc(
"Merge past inlinee's profile to outline version if sample "
151 "profile loader decided not to inline a call site. It will "
152 "only be enabled when top-down order of profile loading is "
157 cl::desc(
"Do profile annotation and inlining for functions in top-down "
158 "order of call graph during sample profile loading. It only "
159 "works for new pass manager. "));
163 cl::desc(
"Process functions in a top-down order "
164 "defined by the profiled call graph when "
165 "-sample-profile-top-down-load is on."));
168 cl::desc(
"Sort profiled recursion by edge weights."));
172 cl::desc(
"Inline cold call sites in profile loader if it's beneficial "
180 cl::desc(
"If true, artifically skip inline transformation in sample-loader "
181 "pass, and merge (or scale) profiles (as configured by "
182 "--sample-profile-merge-inlinee)."));
186 cl::desc(
"The size growth ratio limit for proirity-based sample profile "
187 "loader inlining."));
191 cl::desc(
"The lower bound of size growth limit for "
192 "proirity-based sample profile loader inlining."));
196 cl::desc(
"The upper bound of size growth limit for "
197 "proirity-based sample profile loader inlining."));
201 cl::desc(
"Hot callsite threshold for proirity-based sample profile loader "
206 cl::desc(
"Threshold for inlining cold callsites"));
211 "Relative hotness percentage threshold for indirect "
212 "call promotion in proirity-based sample profile loader inlining."));
217 "Skip relative hotness check for ICP up to given number of targets."));
222 cl::desc(
"Use call site prioritized inlining for sample profile loader."
223 "Currently only CSSPGO is supported."));
228 cl::desc(
"Use the preinliner decisions stored in profile context."));
233 cl::desc(
"Allow sample loader inliner to inline recursive calls."));
238 "Optimization remarks file containing inline remarks to be replayed "
239 "by inlining from sample profile loader."),
243 "sample-profile-inline-replay-scope",
246 "Replay on functions that have remarks associated "
247 "with them (default)"),
249 "Replay on the entire module")),
250 cl::desc(
"Whether inline replay should be applied to the entire "
251 "Module or just the Functions (default) that are present as "
252 "callers in remarks during sample profile inlining."),
256 "sample-profile-inline-replay-fallback",
261 "All decisions not in replay send to original advisor (default)"),
263 "AlwaysInline",
"All decisions not in replay are inlined"),
265 "All decisions not in replay are not inlined")),
266 cl::desc(
"How sample profile inline replay treats sites that don't come "
267 "from the replay. Original: defers to original advisor, "
268 "AlwaysInline: inline all sites not in replay, NeverInline: "
269 "inline no sites not in replay"),
273 "sample-profile-inline-replay-format",
278 "<Line Number>:<Column Number>"),
280 "LineDiscriminator",
"<Line Number>.<Discriminator>"),
282 "LineColumnDiscriminator",
283 "<Line Number>:<Column Number>.<Discriminator> (default)")),
289 cl::desc(
"Max number of promotions for a single indirect "
290 "call callsite in sample profile loader"));
294 cl::desc(
"Ignore existing branch weights on IR and always overwrite."));
302 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
307 class GUIDToFuncNameMapper {
311 : CurrentReader(Reader), CurrentModule(
M),
312 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
313 if (!CurrentReader.useMD5())
316 for (
const auto &
F : CurrentModule) {
318 CurrentGUIDToFuncNameMap.insert(
329 if (CanonName != OrigName)
330 CurrentGUIDToFuncNameMap.insert(
335 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
338 ~GUIDToFuncNameMapper() {
339 if (!CurrentReader.useMD5())
342 CurrentGUIDToFuncNameMap.clear();
346 SetGUIDToFuncNameMapForAll(
nullptr);
351 std::queue<FunctionSamples *> FSToUpdate;
352 for (
auto &IFS : CurrentReader.getProfiles()) {
353 FSToUpdate.push(&IFS.second);
356 while (!FSToUpdate.empty()) {
359 FS->GUIDToFuncNameMap =
Map;
360 for (
const auto &ICS :
FS->getCallsiteSamples()) {
362 for (
auto &IFS : FSMap) {
364 FSToUpdate.push(&
FS);
376 struct InlineCandidate {
386 float CallsiteDistribution;
390 struct CandidateComparer {
391 bool operator()(
const InlineCandidate &
LHS,
const InlineCandidate &
RHS) {
392 if (
LHS.CallsiteCount !=
RHS.CallsiteCount)
393 return LHS.CallsiteCount <
RHS.CallsiteCount;
397 assert(LCS && RCS &&
"Expect non-null FunctionSamples");
408 using CandidateQueue =
417 class SampleProfileLoader final
426 GetAC(
std::
move(GetAssumptionCache)),
427 GetTTI(
std::
move(GetTargetTransformInfo)), GetTLI(
std::
move(GetTLI)),
428 LTOPhase(LTOPhase) {}
441 findFunctionSamples(
const Instruction &
I)
const override;
442 std::vector<const FunctionSamples *>
449 bool tryPromoteAndInlineCandidate(
456 bool getExternalInlineAdvisorShouldInline(
CallBase &CB);
457 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
458 bool getInlineCandidate(InlineCandidate *NewCandidate,
CallBase *CB);
460 tryInlineCandidate(InlineCandidate &Candidate,
463 inlineHotFunctionsWithPriority(
Function &
F,
467 void emitOptimizationRemarksForInlineCandidates(
470 void promoteMergeNotInlinedContextSamples(
474 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(
CallGraph &CG);
475 void generateMDProfMetadata(
Function &
F);
488 std::unique_ptr<SampleContextTracker> ContextTracker;
499 std::unique_ptr<ProfileSymbolList> PSL;
510 struct NotInlinedProfileInfo {
528 bool ProfAccForSymsInList;
531 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
534 std::unique_ptr<PseudoProbeManager> ProbeManager;
537 class SampleProfileLoaderLegacyPass :
public ModulePass {
542 SampleProfileLoaderLegacyPass(
548 return ACT->getAssumptionCache(
F);
551 return TTIWP->getTTI(
F);
554 return TLIWP->getTLI(
F);
560 void dump() { SampleLoader.dump(); }
562 bool doInitialization(
Module &M)
override {
563 return SampleLoader.doInitialization(M);
566 StringRef getPassName()
const override {
return "Sample profile pass"; }
567 bool runOnModule(
Module &M)
override;
577 SampleProfileLoader SampleLoader;
587 return getProbeWeight(Inst);
591 return std::error_code();
596 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
597 return std::error_code();
606 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
607 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
610 return getInstWeightImpl(Inst);
618 "Profile is not pseudo probe based");
623 return std::error_code();
645 if (
const auto *CB = dyn_cast<CallBase>(&Inst))
646 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
652 bool FirstMark = CoverageTracker.markSamplesUsed(
FS, Probe->
Id, 0, Samples);
657 Remark <<
" samples from profile (ProbeId=";
661 Remark <<
", OriginalSamples=";
668 <<
" - weight: " <<
R.get() <<
" - factor: "
688 SampleProfileLoader::findCalleeFunctionSamples(
const CallBase &Inst)
const {
696 CalleeName =
Callee->getName();
699 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
712 std::vector<const FunctionSamples *>
713 SampleProfileLoader::findIndirectCallFunctionSamples(
716 std::vector<const FunctionSamples *>
R;
723 assert(L && R &&
"Expect non-null FunctionSamples");
732 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
733 if (CalleeSamples.empty())
739 for (
const auto *
const FS : CalleeSamples) {
740 Sum +=
FS->getEntrySamples();
752 auto T =
FS->findCallTargetMapAt(CallSite);
755 for (
const auto &T_C :
T.get())
760 for (
const auto &NameFS : *M) {
761 Sum += NameFS.second.getEntrySamples();
762 R.push_back(&NameFS.second);
770 SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
781 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
784 it.first->second = ContextTracker->getContextSamplesFor(DIL);
787 Samples->findFunctionSamples(DIL, Reader->
getRemapper());
789 return it.first->second;
802 std::unique_ptr<InstrProfValueData[]> ValueData =
806 ValueData.get(), NumVals, TotalCount,
true);
812 unsigned NumPromoted = 0;
849 std::unique_ptr<InstrProfValueData[]> ValueData =
853 ValueData.get(), NumVals, OldSum,
true);
857 assert((CallTargets.size() == 1 &&
859 "If sum is 0, assume only one element in CallTargets "
860 "with count being NOMORE_ICP_MAGICNUM");
864 ValueCountMap[ValueData[
I].
Value] = ValueData[
I].Count;
871 OldSum -= Pair.first->second;
881 ValueCountMap[ValueData[
I].Value] = ValueData[
I].Count;
885 for (
const auto &
Data : CallTargets) {
892 assert(Sum >=
Data.Count &&
"Sum should never be less than Data.Count");
898 for (
const auto &ValueCount : ValueCountMap) {
900 InstrProfValueData{ValueCount.first, ValueCount.second});
904 [](
const InstrProfValueData &L,
const InstrProfValueData &R) {
905 if (L.Count != R.Count)
906 return L.Count > R.Count;
907 return L.Value > R.Value;
913 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
926 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
937 auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
942 auto &CI = *Candidate.CallInstr;
946 const char *Reason =
"Callee function not available";
953 if (!
R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
954 R->getValue()->hasFnAttribute(
"use-sample-profile") &&
963 CI,
R->getValue(), Candidate.CallsiteCount, Sum,
false, ORE);
965 Sum -= Candidate.CallsiteCount;
978 Candidate.CallInstr = DI;
979 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
980 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
985 *DI,
static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
992 << Candidate.CalleeSamples->getFuncName() <<
" because "
1003 if (Callee ==
nullptr)
1018 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1021 for (
auto I : Candidates) {
1022 Function *CalledFunction =
I->getCalledFunction();
1023 if (CalledFunction) {
1025 I->getDebugLoc(),
I->getParent())
1026 <<
"previous inlining reattempted for "
1027 << (Hot ?
"hotness: '" :
"size: '")
1028 <<
ore::NV(
"Callee", CalledFunction) <<
"' into '"
1034 void SampleProfileLoader::findExternalInlineCandidate(
1041 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1053 assert(Samples &&
"expect non-null caller profile");
1063 ContextTracker->getContextFor(Samples->
getContext());
1064 std::queue<ContextTrieNode *> CalleeList;
1065 CalleeList.push(Caller);
1066 while (!CalleeList.empty()) {
1086 if (!Func ||
Func->isDeclaration())
1092 for (
const auto &TS : BS.second.getCallTargets())
1093 if (TS.getValue() > Threshold) {
1096 if (!Callee ||
Callee->isDeclaration())
1104 for (
auto &Child : Node->getAllChildContext()) {
1106 CalleeList.push(CalleeNode);
1133 bool SampleProfileLoader::inlineHotFunctions(
1137 assert((!ProfAccForSymsInList ||
1139 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1140 "ProfAccForSymsInList should be false when profile-sample-accurate "
1144 bool Changed =
false;
1145 bool LocalChanged =
true;
1146 while (LocalChanged) {
1147 LocalChanged =
false;
1149 for (
auto &
BB :
F) {
1153 for (
auto &
I :
BB.getInstList()) {
1155 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1156 if (!isa<IntrinsicInst>(
I)) {
1157 if ((
FS = findCalleeFunctionSamples(*CB))) {
1159 "GUIDToFuncNameMap has to be populated");
1160 AllCandidates.push_back(CB);
1165 else if (shouldInlineColdCallee(*CB))
1166 ColdCandidates.push_back(CB);
1167 }
else if (getExternalInlineAdvisorShouldInline(*CB)) {
1168 AllCandidates.push_back(CB);
1173 if (Hot || ExternalInlineAdvisor) {
1174 CIS.
insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1175 emitOptimizationRemarksForInlineCandidates(AllCandidates,
F,
true);
1177 CIS.
insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1178 emitOptimizationRemarksForInlineCandidates(ColdCandidates,
F,
false);
1182 Function *CalledFunction =
I->getCalledFunction();
1183 InlineCandidate Candidate = {
I, LocalNotInlinedCallSites.
lookup(
I),
1187 if (CalledFunction == &
F)
1189 if (
I->isIndirectCall()) {
1191 for (
const auto *
FS : findIndirectCallFunctionSamples(*
I, Sum)) {
1194 findExternalInlineCandidate(
I,
FS, InlinedGUIDs,
SymbolMap,
1195 PSI->getOrCompHotCountThreshold());
1201 Candidate = {
I,
FS,
FS->getEntrySamples(), 1.0};
1202 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum)) {
1203 LocalNotInlinedCallSites.
erase(
I);
1204 LocalChanged =
true;
1207 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1209 if (tryInlineCandidate(Candidate)) {
1210 LocalNotInlinedCallSites.
erase(
I);
1211 LocalChanged =
true;
1214 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1216 PSI->getOrCompHotCountThreshold());
1219 Changed |= LocalChanged;
1225 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1229 bool SampleProfileLoader::tryInlineCandidate(
1236 CallBase &CB = *Candidate.CallInstr;
1238 assert(CalledFunction &&
"Expect a callee with definition");
1242 InlineCost Cost = shouldInlineCandidate(Candidate);
1245 <<
"incompatible inlining");
1253 IFI.UpdateProfile =
false;
1266 if (InlinedCallSites) {
1267 InlinedCallSites->
clear();
1268 for (
auto &
I : IFI.InlinedCallSites)
1269 InlinedCallSites->push_back(
I);
1273 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1283 if (Candidate.CallsiteDistribution < 1) {
1284 for (
auto &
I : IFI.InlinedCallSites) {
1287 Candidate.CallsiteDistribution);
1289 NumDuplicatedInlinesite++;
1295 bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1297 assert(CB &&
"Expect non-null call instruction");
1299 if (isa<IntrinsicInst>(CB))
1303 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1306 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1315 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1320 SampleProfileLoader::getExternalInlineAdvisorCost(
CallBase &CB) {
1321 std::unique_ptr<InlineAdvice> Advice =
nullptr;
1322 if (ExternalInlineAdvisor) {
1323 Advice = ExternalInlineAdvisor->getAdvice(CB);
1325 if (!Advice->isInliningRecommended()) {
1326 Advice->recordUnattemptedInlining();
1329 Advice->recordInlining();
1337 bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(
CallBase &CB) {
1339 return Cost ? !!Cost.
getValue() :
false;
1343 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1345 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1346 return ReplayCost.getValue();
1351 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1358 assert(Callee &&
"Expect a definition for inline candidate of direct call");
1371 GetTTI(*Callee), GetAC, GetTLI);
1407 bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1411 assert((!ProfAccForSymsInList ||
1413 !
F.hasFnAttribute(
"profile-sample-accurate"))) &&
1414 "ProfAccForSymsInList should be false when profile-sample-accurate "
1419 CandidateQueue CQueue;
1420 InlineCandidate NewCandidate;
1421 for (
auto &
BB :
F) {
1422 for (
auto &
I :
BB.getInstList()) {
1423 auto *CB = dyn_cast<CallBase>(&
I);
1426 if (getInlineCandidate(&NewCandidate, CB))
1427 CQueue.push(NewCandidate);
1436 "Max inline size limit should not be smaller than min inline size "
1441 if (ExternalInlineAdvisor)
1447 bool Changed =
false;
1448 while (!CQueue.empty() &&
F.getInstructionCount() <
SizeLimit) {
1449 InlineCandidate Candidate = CQueue.top();
1452 Function *CalledFunction =
I->getCalledFunction();
1454 if (CalledFunction == &
F)
1456 if (
I->isIndirectCall()) {
1458 auto CalleeSamples = findIndirectCallFunctionSamples(*
I, Sum);
1460 Sum *= Candidate.CallsiteDistribution;
1461 unsigned ICPCount = 0;
1462 for (
const auto *
FS : CalleeSamples) {
1465 findExternalInlineCandidate(
I,
FS, InlinedGUIDs,
SymbolMap,
1466 PSI->getOrCompHotCountThreshold());
1470 FS->getEntrySamples() * Candidate.CallsiteDistribution;
1485 if (!PSI->isHotCount(EntryCountDistributed))
1490 Candidate = {
I,
FS, EntryCountDistributed,
1491 Candidate.CallsiteDistribution};
1492 if (tryPromoteAndInlineCandidate(
F, Candidate, SumOrigin, Sum,
1493 &InlinedCallSites)) {
1494 for (
auto *CB : InlinedCallSites) {
1495 if (getInlineCandidate(&NewCandidate, CB))
1496 CQueue.emplace(NewCandidate);
1500 }
else if (!ContextTracker) {
1504 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
1507 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1508 for (
auto *CB : InlinedCallSites) {
1509 if (getInlineCandidate(&NewCandidate, CB))
1510 CQueue.emplace(NewCandidate);
1513 }
else if (!ContextTracker) {
1514 LocalNotInlinedCallSites.
try_emplace(
I, Candidate.CalleeSamples);
1517 findExternalInlineCandidate(
I, findCalleeFunctionSamples(*
I),
1519 PSI->getOrCompHotCountThreshold());
1523 if (!CQueue.empty()) {
1525 ++NumCSInlinedHitMaxLimit;
1527 ++NumCSInlinedHitMinLimit;
1529 ++NumCSInlinedHitGrowthLimit;
1535 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites,
F);
1539 void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1543 for (
const auto &Pair : NonInlinedCallSites) {
1546 if (!Callee ||
Callee->isDeclaration())
1550 I->getDebugLoc(),
I->getParent())
1551 <<
"previous inlining not repeated: '"
1552 <<
ore::NV(
"Callee", Callee) <<
"' into '"
1557 if (
FS->getTotalSamples() == 0 &&
FS->getEntrySamples() == 0) {
1571 if (
FS->getHeadSamples() == 0) {
1575 FS->getEntrySamples());
1587 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1588 pair.first->second.entryCount +=
FS->getEntrySamples();
1606 void SampleProfileLoader::generateMDProfMetadata(
Function &
F) {
1609 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1612 for (
auto &BI :
F) {
1615 if (BlockWeights[
BB]) {
1616 for (
auto &
I :
BB->getInstList()) {
1617 if (!isa<CallInst>(
I) && !isa<InvokeInst>(
I))
1628 auto T =
FS->findCallTargetMapAt(CallSite);
1629 if (!
T ||
T.get().empty())
1643 for (
const auto &
C :
T.get())
1650 FS->findFunctionSamplesMapAt(CallSite)) {
1651 for (
const auto &NameFS : *M)
1652 Sum += NameFS.second.getEntrySamples();
1658 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1659 }
else if (!isa<IntrinsicInst>(&
I)) {
1660 I.setMetadata(LLVMContext::MD_prof,
1661 MDB.createBranchWeights(
1662 {static_cast<uint32_t>(BlockWeights[BB])}));
1668 for (
auto &
I :
BB->getInstList()) {
1669 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1671 I.setMetadata(LLVMContext::MD_prof,
nullptr);
1673 I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0));
1681 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1682 !isa<IndirectBrInst>(TI))
1688 :
Twine(
"<UNKNOWN LOCATION>"))
1697 std::vector<uint64_t> EdgeIndex;
1702 EdgeIndex[
I] = EdgeMultiplicity[Succ];
1703 EdgeMultiplicity[Succ]++;
1708 Edge
E = std::make_pair(
BB, Succ);
1721 Weights.push_back(
static_cast<uint32_t>(Weight + 1));
1725 uint64_t W = Weight / EdgeMultiplicity[Succ];
1727 if (EdgeIndex[
I] < Weight % EdgeMultiplicity[Succ])
1729 Weights.push_back(
static_cast<uint32_t>(
W));
1732 if (Weight > MaxWeight) {
1734 MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
1753 if (MaxWeight > 0 &&
1756 TI->
setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1759 <<
"most popular destination for conditional branches at "
1760 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1779 bool SampleProfileLoader::emitAnnotations(
Function &
F) {
1780 bool Changed =
false;
1783 if (!ProbeManager->profileIsValid(
F, *Samples)) {
1785 dbgs() <<
"Profile is invalid due to CFG mismatch for Function "
1787 ++NumMismatchedProfile;
1790 ++NumMatchedProfile;
1792 if (getFunctionLoc(
F) == 0)
1796 <<
F.getName() <<
": " << getFunctionLoc(
F) <<
"\n");
1801 Changed |= inlineHotFunctionsWithPriority(
F, InlinedGUIDs);
1803 Changed |= inlineHotFunctions(
F, InlinedGUIDs);
1805 Changed |= computeAndPropagateWeights(
F, InlinedGUIDs);
1808 generateMDProfMetadata(
F);
1810 emitCoverageRemarks(
F);
1817 "Sample Profile loader",
false,
false)
1826 SampleProfileLoader::buildProfiledCallGraph(
CallGraph &CG) {
1827 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1829 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1831 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->
getProfiles());
1836 for (
auto &Node : CG) {
1837 const auto *
F = Node.first;
1838 if (!
F ||
F->isDeclaration() || !
F->hasFnAttribute(
"use-sample-profile"))
1846 std::vector<Function *>
1848 std::vector<Function *> FunctionOrderList;
1849 FunctionOrderList.reserve(
M.size());
1852 errs() <<
"WARNING: -use-profiled-call-graph ignored, should be used "
1853 "together with -sample-profile-top-down-load.\n";
1866 if (!
F.isDeclaration() &&
F.hasFnAttribute(
"use-sample-profile"))
1867 FunctionOrderList.push_back(&
F);
1868 return FunctionOrderList;
1923 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
1932 for (
auto *Node : Range) {
1934 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1935 FunctionOrderList.push_back(
F);
1943 auto *
F = Node->getFunction();
1944 if (
F && !
F->isDeclaration() &&
F->hasFnAttribute(
"use-sample-profile"))
1945 FunctionOrderList.push_back(
F);
1952 dbgs() <<
"Function processing order:\n";
1953 for (
auto F :
reverse(FunctionOrderList)) {
1954 dbgs() <<
F->getName() <<
"\n";
1958 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1959 return FunctionOrderList;
1962 bool SampleProfileLoader::doInitialization(
Module &M,
1964 auto &Ctx =
M.getContext();
1968 if (std::error_code EC = ReaderOrErr.getError()) {
1969 std::string
Msg =
"Could not open profile: " +
EC.message();
1978 if (std::error_code EC = Reader->
read()) {
1979 std::string
Msg =
"profile reading failed: " +
EC.message();
1987 ProfAccForSymsInList =
1989 if (ProfAccForSymsInList) {
1990 NamesInProfile.clear();
1992 NamesInProfile.insert(NameTable->begin(), NameTable->end());
1993 CoverageTracker.setProfAccForSymsInList(
true);
1998 M, *
FAM, Ctx,
nullptr,
2043 ContextTracker = std::make_unique<SampleContextTracker>(
2049 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2050 if (!ProbeManager->moduleIsProbed(M)) {
2052 "Pseudo-probe-based profile requires SampleProfileProbePass";
2063 return new SampleProfileLoaderLegacyPass();
2067 return new SampleProfileLoaderLegacyPass(
Name);
2072 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2075 if (
M.getProfileSummary(
false) ==
nullptr) {
2082 TotalCollectedSamples +=
I.second.getTotalSamples();
2086 for (
const auto &N_F :
M.getValueSymbolTable()) {
2088 Function *
F = dyn_cast<Function>(N_F.getValue());
2089 if (
F ==
nullptr || OrigName.
empty())
2093 if (OrigName != NewName && !NewName.
empty()) {
2100 r.first->second =
nullptr;
2105 if (
auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2106 if (*MapName != OrigName && !MapName->empty())
2112 "No empty StringRef should be added in SymbolMap");
2114 bool retval =
false;
2115 for (
auto F : buildFunctionOrder(M, CG)) {
2117 clearFunctionData();
2123 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
2130 bool SampleProfileLoaderLegacyPass::runOnModule(
Module &M) {
2131 ACT = &getAnalysis<AssumptionCacheTracker>();
2132 TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
2133 TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
2135 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2136 return SampleLoader.runOnModule(M,
nullptr, PSI,
nullptr);
2140 LLVM_DEBUG(
dbgs() <<
"\n\nProcessing Function " <<
F.getName() <<
"\n");
2141 DILocation2SampleMap.clear();
2152 initialEntryCount = 0;
2155 ProfAccForSymsInList =
false;
2157 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2163 if (ProfAccForSymsInList) {
2165 if (PSL->contains(
F.getName()))
2166 initialEntryCount = 0;
2179 if (NamesInProfile.count(CanonName))
2180 initialEntryCount = -1;
2185 if (!
F.getEntryCount().hasValue())
2187 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2194 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
2195 ORE = OwnedORE.get();
2199 Samples = ContextTracker->getBaseSamplesFor(
F);
2203 if (Samples && !Samples->
empty())
2204 return emitAnnotations(
F);
2223 SampleProfileLoader SampleLoader(
2226 : ProfileRemappingFileName,
2227 LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
2229 if (!SampleLoader.doInitialization(
M, &
FAM))
2234 if (!SampleLoader.runOnModule(
M, &AM, PSI, &CG))
A set of analyses that are preserved following a run of a transformation pass.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
int getCost() const
Get the inline cost estimate.
Analysis pass providing the TargetTransformInfo.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
cl::opt< bool > EnableExtTspBlockPlacement
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
cl::opt< int > ProfileInlineGrowthLimit("sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " "loader inlining."))
@ ContextDuplicatedIntoBase
cl::opt< int > ProfileInlineLimitMax("sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), cl::desc("The upper bound of size growth limit for " "proirity-based sample profile loader inlining."))
static bool ProfileIsProbeBased
An analysis pass to compute the CallGraph for a Module.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
bool profileIsProbeBased() const
Whether input profile is based on pseudo probes.
bool hasAttribute(ContextAttributeMask A)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
The basic data container for the call graph of a Module of IR.
FunctionAnalysisManager FAM
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
bool erase(const KeyT &Val)
void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
ModulePass * createSampleProfileLoaderPass()
cl::opt< int > ProfileInlineLimitMin("sample-profile-inline-limit-min", cl::Hidden, cl::init(100), cl::desc("The lower bound of size growth limit for " "proirity-based sample profile loader inlining."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
Thresholds to tune inline cost analysis.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
DiagnosticInfoOptimizationBase::Argument NV
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const StringMap< Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the function name.
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
SampleProfileReaderItaniumRemapper * getRemapper()
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
std::pair< iterator, bool > insert(const ValueT &V)
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
LLVM Basic Block Representation.
void SetContextSynthetic()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Allow sample loader inliner to inline recursive calls."))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
cl::opt< bool > SampleProfileUseProfi
StringRef getFuncName() const
Return the original function name.
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artifically skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool profileIsCS() const
Whether input profile is fully context-sensitive.
cl::opt< bool > SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights."))
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::error_code read()
The interface to read sample profiles from the associated file.
(vector float) vec_cmpeq(*A, *B) C
Metadata * getMD(LLVMContext &Context, bool AddPartialField=true, bool AddPartialProfileRatioField=true)
Return summary information as metadata.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use the preinliner decisions stored in profile context."))
Represent the analysis usage information of a pass.
Represents the cost of inlining a function.
void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
static uint64_t getGUID(StringRef Name)
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
Function::ProfileCount ProfileCount
STATISTIC(NumFunctions, "Total number of functions")
int getNumOccurrences() const
void setProbeDistributionFactor(Instruction &Inst, float Factor)
A node in the call graph for a module.
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
static InlineCost get(int Cost, int Threshold)
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Analysis providing profile information.
Implements a dense probed hash-table based set.
Function::ProfileCount ProfileCount
Used in the streaming interface as the general argument type.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
Module * getParent()
Get the module that this global value is contained inside of...
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
A function analysis which provides an AssumptionCache.
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Representation of the samples collected for a function.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
This is an important class for using LLVM in a threaded context.
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
initializer< Ty > init(const Ty &Val)
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
constexpr const T & getValue() const &
StandardInstrumentations SI(Debug, VerifyEach)
static bool UseMD5
Whether the profile uses MD5 to represent string.
print Print MemDeps of function
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
A Module instance is used to store all the information related to an LLVM module.
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
virtual void setSkipFlatProf(bool Skip)
Don't read profile without context if the flag is set.
cl::opt< int > SampleHotCallSiteThreshold("sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), cl::desc("Hot callsite threshold for proirity-based sample profile loader " "inlining."))
Diagnostic information for the sample profiler.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
StringSet - A wrapper for StringMap that provides set-like functionality.
An immutable pass that tracks lazily created AssumptionCache objects.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
StringRef - Represent a constant reference to a string, i.e.
A cache of @llvm.assume calls within a function.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static bool isIndirectCall(const MachineInstr &MI)
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
ProfileSummary & getSummary() const
Return the profile summary.
const CustomOperand< const MCSubtargetInfo & > Msg[]
amdgpu Simplify well known AMD library false FunctionCallee Callee
static bool runOnFunction(Function &F, bool PostInlining)
SampleProfileMap & getProfiles()
Return all the profiles.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
FunctionSamples * getOrCreateSamplesFor(const Function &F)
Return the samples collected for function F, create empty FunctionSamples if it doesn't exist.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
void sort(IteratorTy Start, IteratorTy End)
Provides information about what library functions are available for the current target.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
Sample-based profile reader.
@ None
No LTO/ThinLTO behavior needed.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
void setModule(const Module *Mod)
cl::opt< int > SampleColdCallSiteThreshold("sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Module & getModule() const
Returns the module the call graph corresponds to.
bool profileIsPreInlined() const
Whether input profile contains ShouldBeInlined contexts.
Optional< PseudoProbe > extractProbe(const Instruction &Inst)
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
SampleContext & getContext() const
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
sample Sample Profile loader
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported."))
const BasicBlock * getParent() const
Represents either an error or a value T.
Align max(MaybeAlign Lhs, Align Rhs)
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A container for analyses that lazily runs them and caches their results.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
AnalysisUsage & addRequired()
llvm::cl::opt< bool > UseIterativeBFIInference
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Class to represent profile counts.
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)