66 #define DEBUG_TYPE "partial-inlining" 69 "Number of callsites functions partially inlined into.");
70 STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with " 71 "cold outlined regions were partially " 72 "inlined into its caller(s).");
74 "Number of cold single entry/exit regions found.");
76 "Number of cold single entry/exit regions outlined.");
86 cl::desc(
"Disable multi-region partial inlining"));
92 cl::desc(
"Force outline regions with live exits"));
98 cl::desc(
"Mark outline function calls with ColdCC"));
104 cl::desc(
"Trace partial inlining."));
118 cl::desc(
"Minimum ratio comparing relative sizes of each " 119 "outline candidate and original function"));
124 cl::desc(
"Minimum block executions to consider " 125 "its BranchProbabilityInfo valid"));
130 cl::desc(
"Minimum BranchProbability to consider a region cold."));
134 cl::desc(
"Max number of blocks to be partially inlined"));
140 cl::desc(
"Max number of partial inlining. The default is unlimited"));
148 cl::desc(
"Relative frequency of outline region to " 153 cl::desc(
"A debug option to add additional penalty to the computed one."));
157 struct FunctionOutliningInfo {
158 FunctionOutliningInfo() =
default;
162 unsigned GetNumInlinedBlocks()
const {
return Entries.size() + 1; }
178 struct FunctionOutliningMultiRegionInfo {
179 FunctionOutliningMultiRegionInfo()
183 struct OutlineRegionInfo {
187 : Region(Region.
begin(), Region.
end()), EntryBlock(EntryBlock),
188 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
198 struct PartialInlinerImpl {
206 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
207 GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
217 std::pair<bool, Function *> unswitchFunction(
Function *
F);
223 struct FunctionCloner {
226 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
229 FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OMRI,
237 void NormalizeReturnBlock();
240 bool doMultiRegionFunctionOutlining();
247 Function *doSingleRegionFunctionOutlining();
252 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
258 bool IsFunctionInlined =
false;
260 int OutlinedRegionCost = 0;
262 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
264 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
265 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
271 int NumPartialInlining = 0;
272 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
274 std::function<TargetTransformInfo &(Function &)> *GetTTI;
286 bool shouldPartialInline(
CallSite CS, FunctionCloner &Cloner,
293 bool tryPartialInline(FunctionCloner &Cloner);
297 void computeCallsiteToProfCountMap(
Function *DuplicateFunction,
300 bool IsLimitReached() {
307 if (
CallInst *CI = dyn_cast<CallInst>(U))
309 else if (
InvokeInst *II = dyn_cast<InvokeInst>(U))
318 return getCallSite(User);
321 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function *F) {
325 return std::make_tuple(DLoc, Block);
334 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
339 static int computeBBInlineCost(
BasicBlock *BB);
341 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(
Function *F);
342 std::unique_ptr<FunctionOutliningMultiRegionInfo>
346 struct PartialInlinerLegacyPass :
public ModulePass {
359 bool runOnModule(
Module &M)
override {
365 &getAnalysis<TargetTransformInfoWrapperPass>();
367 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
369 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
378 std::function<TargetTransformInfo &(Function &)> GetTTI =
380 return TTIWP->getTTI(F);
383 return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
391 std::unique_ptr<FunctionOutliningMultiRegionInfo>
392 PartialInlinerImpl::computeOutliningColdRegionsInfo(
Function *F,
399 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
403 BFI = ScopedBFI.get();
405 BFI = &(*GetBFI)(*F);
408 if (!PSI->hasInstrumentationProfile())
409 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
411 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
412 std::make_unique<FunctionOutliningMultiRegionInfo>();
422 for (
auto *Block : BlockList) {
429 <<
"Region dominated by " 430 <<
ore::NV(
"Block", BlockList.front()->getName())
431 <<
" has more than one region exit edge.";
450 int OverallFunctionCost = 0;
452 OverallFunctionCost += computeBBInlineCost(&BB);
456 dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost <<
"\n";
458 int MinOutlineRegionCost =
462 MinBlockCounterExecution);
463 bool ColdCandidateFound =
false;
465 std::vector<BasicBlock *>
DFS;
467 DFS.push_back(CurrEntry);
468 VisitedMap[CurrEntry] =
true;
475 while (!DFS.empty()) {
476 auto *thisBB = DFS.back();
481 if (PSI->isColdBlock(thisBB, BFI) ||
487 VisitedMap[*
SI] =
true;
491 if (SuccProb > MinBranchProbability)
495 dbgs() <<
"Found cold edge: " << thisBB->getName() <<
"->" 496 << (*SI)->getName() <<
"\nBranch Probability = " << SuccProb
501 DT.getDescendants(*SI, DominateVector);
503 if (!IsSingleEntry(DominateVector))
507 if (!(ExitBlock = IsSingleExit(DominateVector)))
509 int OutlineRegionCost = 0;
510 for (
auto *BB : DominateVector)
511 OutlineRegionCost += computeBBInlineCost(BB);
515 dbgs() <<
"OutlineRegionCost = " << OutlineRegionCost <<
"\n";
518 if (OutlineRegionCost < MinOutlineRegionCost) {
522 <<
ore::NV(
"Callee", F) <<
" inline cost-savings smaller than " 523 <<
ore::NV(
"Cost", MinOutlineRegionCost);
531 for (
auto *BB : DominateVector)
532 VisitedMap[BB] =
true;
536 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
537 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
538 OutliningInfo->ORI.push_back(RegInfo);
541 dbgs() <<
"Found Cold Candidate starting at block: " 542 << DominateVector.front()->getName() <<
"\n";
545 ColdCandidateFound =
true;
546 NumColdRegionsFound++;
549 if (ColdCandidateFound)
550 return OutliningInfo;
552 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
555 std::unique_ptr<FunctionOutliningInfo>
556 PartialInlinerImpl::computeOutliningInfo(
Function *F) {
560 return std::unique_ptr<FunctionOutliningInfo>();
569 return isa<ReturnInst>(TI);
573 if (IsReturnBlock(Succ1))
574 return std::make_tuple(Succ1, Succ2);
575 if (IsReturnBlock(Succ2))
576 return std::make_tuple(Succ2, Succ1);
578 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
583 if (IsSuccessor(Succ1, Succ2))
584 return std::make_tuple(Succ1, Succ2);
585 if (IsSuccessor(Succ2, Succ1))
586 return std::make_tuple(Succ2, Succ1);
588 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
591 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
592 std::make_unique<FunctionOutliningInfo>();
595 bool CandidateFound =
false;
610 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
613 OutliningInfo->Entries.push_back(CurrEntry);
614 OutliningInfo->ReturnBlock = ReturnBlock;
615 OutliningInfo->NonReturnBlock = NonReturnBlock;
616 CandidateFound =
true;
622 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
627 OutliningInfo->Entries.push_back(CurrEntry);
632 return std::unique_ptr<FunctionOutliningInfo>();
638 "Function Entry must be the first in Entries vector");
645 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
647 if (!Entries.count(Pred))
652 auto CheckAndNormalizeCandidate =
653 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
654 for (
BasicBlock *E : OutliningInfo->Entries) {
656 if (Entries.count(Succ))
658 if (Succ == OutliningInfo->ReturnBlock)
659 OutliningInfo->ReturnBlockPreds.push_back(E);
660 else if (Succ != OutliningInfo->NonReturnBlock)
664 if (HasNonEntryPred(E))
670 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
671 return std::unique_ptr<FunctionOutliningInfo>();
676 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
680 if (HasNonEntryPred(Cand))
687 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
688 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
695 OutliningInfo->Entries.push_back(Cand);
696 OutliningInfo->NonReturnBlock = NonReturnBlock;
697 OutliningInfo->ReturnBlockPreds.push_back(Cand);
698 Entries.insert(Cand);
701 return OutliningInfo;
709 for (
auto *E : OI->Entries) {
721 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
722 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
724 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
725 auto OutliningCallFreq =
726 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
730 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
731 OutliningCallFreq = EntryFreq;
734 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
737 return OutlineRegionRelFreq;
752 return OutlineRegionRelFreq;
757 return OutlineRegionRelFreq;
760 bool PartialInlinerImpl::shouldPartialInline(
761 CallSite CS, FunctionCloner &Cloner,
768 assert(Callee == Cloner.ClonedFunc);
774 auto &CalleeTTI = (*GetTTI)(*Callee);
775 bool RemarksEnabled =
778 assert(Call &&
"invalid callsite for partial inline");
780 CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
781 RemarksEnabled ? &ORE :
nullptr);
786 <<
NV(
"Callee", Cloner.OrigFunc)
787 <<
" should always be fully inlined, not partially";
795 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 796 <<
NV(
"Caller", Caller)
797 <<
" because it should never be inlined (cost=never)";
805 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 806 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost=" 807 <<
NV(
"Cost", IC.
getCost()) <<
", threshold=" 819 if (NormWeightedSavings < WeightedOutliningRcost) {
823 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 824 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead=" 825 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
829 <<
" of making the outlined call is too high";
837 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into " 838 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
848 int PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB) {
853 switch (
I.getOpcode()) {
854 case Instruction::BitCast:
855 case Instruction::PtrToInt:
856 case Instruction::IntToPtr:
857 case Instruction::Alloca:
858 case Instruction::PHI:
860 case Instruction::GetElementPtr:
861 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
868 if (
I.isLifetimeStartOrEnd())
871 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
891 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
892 int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
893 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
894 Function *OutlinedFunc = FuncBBPair.first;
895 BasicBlock* OutliningCallBB = FuncBBPair.second;
898 OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
902 OutlinedFunctionCost += computeBBInlineCost(&BB);
904 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
905 "Outlined function cost should be no less than the outlined region");
910 OutlinedFunctionCost -=
913 int OutliningRuntimeOverhead =
914 OutliningFuncCallCost +
915 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
918 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
924 void PartialInlinerImpl::computeCallsiteToProfCountMap(
930 std::unique_ptr<BlockFrequencyInfo> TempBFI;
933 auto ComputeCurrBFI = [&,
this](
Function *Caller) {
940 CurrentCallerBFI = TempBFI.get();
943 CurrentCallerBFI = &(*GetBFI)(*Caller);
947 for (User *User :
Users) {
950 if (CurrentCaller != Caller) {
951 CurrentCaller = Caller;
952 ComputeCurrBFI(Caller);
954 assert(CurrentCallerBFI &&
"CallerBFI is not set");
959 CallSiteToProfCountMap[User] = *Count;
961 CallSiteToProfCountMap[User] = 0;
965 PartialInlinerImpl::FunctionCloner::FunctionCloner(
968 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
969 ClonedOI = std::make_unique<FunctionOutliningInfo>();
975 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
976 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
978 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
982 ClonedOI->ReturnBlockPreds.push_back(NewE);
989 PartialInlinerImpl::FunctionCloner::FunctionCloner(
990 Function *F, FunctionOutliningMultiRegionInfo *OI,
993 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
994 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
1002 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1006 Region.
push_back(cast<BasicBlock>(VMap[BB]));
1008 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1009 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1011 if (RegionInfo.ReturnBlock)
1012 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1013 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1014 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1015 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1022 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1026 while (I != BB->
end()) {
1047 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1049 PHINode *FirstPhi = getFirstPHI(PreReturn);
1050 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1056 Value *CommonValue = PN->getIncomingValue(0);
1057 if (
all_of(PN->incoming_values(),
1058 [&](
Value *V) {
return V == CommonValue; }))
1063 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1064 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1068 while (I != PreReturn->
end()) {
1076 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1079 for (
BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1088 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1094 for (
auto *DP : DeadPhis)
1095 DP->eraseFromParent();
1097 for (
auto E : ClonedOI->ReturnBlockPreds) {
1102 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1107 Cost += computeBBInlineCost(BB);
1111 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1113 if (ClonedOMRI->ORI.empty())
1129 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1131 int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1134 ClonedFuncBFI.get(), &BPI,
1135 LookupAC(*RegionInfo.EntryBlock->getParent()),
1142 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1143 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1144 for (
Value *value : Inputs)
1145 dbgs() <<
"value used in func: " << *value <<
"\n";
1146 for (
Value *output : Outputs)
1147 dbgs() <<
"instr used in func: " << *output <<
"\n";
1157 CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1160 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1161 NumColdRegionsOutlined++;
1162 OutlinedRegionCost += CurrentOutlinedRegionCost;
1171 &RegionInfo.Region.front()->front())
1172 <<
"Failed to extract region at block " 1173 <<
ore::NV(
"Block", RegionInfo.Region.front());
1177 return !OutlinedFunctions.empty();
1181 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1184 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1185 return BB == ClonedOI->ReturnBlock ||
1186 (
std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1187 ClonedOI->Entries.end());
1190 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1201 std::vector<BasicBlock *> ToExtract;
1202 ToExtract.push_back(ClonedOI->NonReturnBlock);
1203 OutlinedRegionCost +=
1204 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1206 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1207 ToExtract.push_back(&BB);
1212 OutlinedRegionCost += computeBBInlineCost(&BB);
1219 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1221 .extractCodeRegion(CEAC);
1225 PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1229 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1233 &ToExtract.front()->front())
1234 <<
"Failed to extract region at block " 1235 <<
ore::NV(
"Block", ToExtract.front());
1238 return OutlinedFunc;
1241 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1245 ClonedFunc->eraseFromParent();
1246 if (!IsFunctionInlined) {
1249 for (
auto FuncBBPair : OutlinedFunctions) {
1256 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function *F) {
1259 return {
false,
nullptr};
1263 return {
false,
nullptr};
1266 return {
false,
nullptr};
1268 if (PSI->isFunctionEntryCold(F))
1269 return {
false,
nullptr};
1271 if (F->
users().empty())
1272 return {
false,
nullptr};
1280 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1281 computeOutliningColdRegionsInfo(F, ORE);
1283 FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
1287 dbgs() <<
"HotCountThreshold = " << PSI->getHotCountThreshold() <<
"\n";
1288 dbgs() <<
"ColdCountThreshold = " << PSI->getColdCountThreshold()
1292 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1297 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1298 Cloner.ClonedFunc->print(
dbgs());
1299 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1303 if (tryPartialInline(Cloner))
1304 return {
true,
nullptr};
1312 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1314 return {
false,
nullptr};
1316 FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
1317 Cloner.NormalizeReturnBlock();
1319 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1321 if (!OutlinedFunction)
1322 return {
false,
nullptr};
1324 bool AnyInline = tryPartialInline(Cloner);
1327 return {
true, OutlinedFunction};
1329 return {
false,
nullptr};
1332 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1333 if (Cloner.OutlinedFunctions.empty())
1338 int NonWeightedRcost;
1339 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1344 if (Cloner.ClonedOI) {
1345 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1354 WeightedRcost =
BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1364 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1365 OrigFuncORE.
emit([&]() {
1368 <<
ore::NV(
"Function", Cloner.OrigFunc)
1369 <<
" not partially inlined into callers (Original Size = " 1370 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1371 <<
", Size of call sequence to outlined function = " 1372 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1377 assert(Cloner.OrigFunc->users().empty() &&
1378 "F's users should all be replaced!");
1380 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1381 Cloner.ClonedFunc->user_end());
1384 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1385 if (CalleeEntryCount)
1386 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1388 uint64_t CalleeEntryCountV =
1389 (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1391 bool AnyInline =
false;
1392 for (User *User :
Users) {
1395 if (IsLimitReached())
1399 if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
1405 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into " 1412 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1419 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1420 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1421 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1425 NumPartialInlining++;
1427 if (Cloner.ClonedOI)
1428 NumPartialInlined++;
1430 NumColdOutlinePartialInlined++;
1435 Cloner.IsFunctionInlined =
true;
1436 if (CalleeEntryCount)
1437 Cloner.OrigFunc->setEntryCount(
1438 CalleeEntryCount.setCount(CalleeEntryCountV));
1440 OrigFuncORE.
emit([&]() {
1442 <<
"Partially inlined into at least one caller";
1450 bool PartialInlinerImpl::run(
Module &M) {
1454 std::vector<Function *> Worklist;
1455 Worklist.reserve(M.
size());
1458 Worklist.push_back(&F);
1460 bool Changed =
false;
1461 while (!Worklist.empty()) {
1463 Worklist.pop_back();
1468 bool Recursive =
false;
1469 for (User *U : CurrFunc->
users())
1471 if (
I->getParent()->getParent() == CurrFunc) {
1478 std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1480 Worklist.push_back(Result.second);
1481 Changed |= Result.first;
1490 "Partial Inliner",
false,
false)
1498 return new PartialInlinerLegacyPass();
1505 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1514 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1519 std::function<TargetTransformInfo &(Function &)> GetTTI =
1526 if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
void push_back(const T &Elt)
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine's calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
const BasicBlock & back() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
STATISTIC(NumFunctions, "Total number of functions")
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
iv Induction Variable Users
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Represents the cost of inlining a function.
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
InstrTy * getInstruction() const
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
const T & getValue() const LLVM_LVALUE_FUNCTION
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Interval::succ_iterator succ_end(Interval *I)
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Control flow instructions. These all have token chains.
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
A set of analyses that are preserved following a run of a transformation pass.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
std::pair< iterator, bool > insert(const ValueT &V)
static ManagedStatic< OptionRegistry > OR
Represent the analysis usage information of a pass.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
virtual bool isMissedOptRemarkEnabled(StringRef PassName) const
Return true if missed optimization remarks are enabled, override to provide different implementation...
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
Align max(MaybeAlign Lhs, Align Rhs)
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
AssumptionCache * lookupAssumptionCache(Function &F)
Return the cached assumptions for a function if it has already been scanned.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
amdgpu Simplify well known AMD library false FunctionCallee Callee
BBTy * getParent() const
Get the basic block containing the call site.
iterator_range< user_iterator > users()
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
unsigned succ_size(const Instruction *I)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
const Function * getParent() const
Return the enclosing method, or null if none.
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it...
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
const BasicBlock & front() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
succ_range successors(Instruction *I)
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
const BasicBlock * getParent() const
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
FunTy * getCaller() const
Return the caller function for this call site.