63#define DEBUG_TYPE "partial-inlining"
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
71 "Number of cold single entry/exit regions found.");
73 "Number of cold single entry/exit regions outlined.");
83 cl::desc(
"Disable multi-region partial inlining"));
89 cl::desc(
"Force outline regions with live exits"));
95 cl::desc(
"Mark outline function calls with ColdCC"));
108 cl::desc(
"Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
114 cl::desc(
"Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
120 cl::desc(
"Minimum BranchProbability to consider a region cold."));
124 cl::desc(
"Max number of blocks to be partially inlined"));
130 cl::desc(
"Max number of partial inlining. The default is unlimited"));
138 cl::desc(
"Relative frequency of outline region to "
143 cl::desc(
"A debug option to add additional penalty to the computed one."));
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() =
default;
152 unsigned getNumInlinedBlocks()
const {
return Entries.size() + 1; }
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() =
default;
172 struct OutlineRegionInfo {
177 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
187struct PartialInlinerImpl {
196 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
197 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
207 std::pair<bool, Function *> unswitchFunction(
Function &
F);
213 struct FunctionCloner {
216 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
220 FunctionCloner(
Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
230 void normalizeReturnBlock()
const;
233 bool doMultiRegionFunctionOutlining();
240 Function *doSingleRegionFunctionOutlining();
245 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
251 bool IsFunctionInlined =
false;
255 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
257 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
258 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
265 int NumPartialInlining = 0;
278 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner)
const;
282 bool shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner,
289 bool tryPartialInline(FunctionCloner &Cloner);
294 computeCallsiteToProfCountMap(
Function *DuplicateFunction,
297 bool isLimitReached()
const {
303 if (isa<CallInst>(U) || isa<InvokeInst>(U))
304 return cast<CallBase>(U);
311 return getSupportedCallBase(
User);
314 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function &
F)
const {
318 return std::make_tuple(DLoc,
Block);
327 std::tuple<InstructionCost, InstructionCost>
328 computeOutliningCosts(FunctionCloner &Cloner)
const;
336 std::unique_ptr<FunctionOutliningInfo>
339 std::unique_ptr<FunctionOutliningMultiRegionInfo>
340 computeOutliningColdRegionsInfo(
Function &
F,
346std::unique_ptr<FunctionOutliningMultiRegionInfo>
347PartialInlinerImpl::computeOutliningColdRegionsInfo(
354 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
358 BFI = ScopedBFI.get();
363 if (!PSI.hasInstrumentationProfile())
364 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
366 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
367 std::make_unique<FunctionOutliningMultiRegionInfo>();
372 for (
auto *
Block : BlockList) {
379 <<
"Region dominated by "
380 <<
ore::NV(
"Block", BlockList.front()->getName())
381 <<
" has more than one region exit edge.";
394 return BFI->getBlockProfileCount(BB).value_or(0);
402 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
404 LLVM_DEBUG(
dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost
413 bool ColdCandidateFound =
false;
415 std::vector<BasicBlock *> DFS;
417 DFS.push_back(CurrEntry);
418 VisitedMap[CurrEntry] =
true;
426 while (!DFS.empty()) {
427 auto *ThisBB = DFS.back();
432 if (PSI.isColdBlock(ThisBB, BFI) ||
438 VisitedMap[*
SI] =
true;
442 if (SuccProb > MinBranchProbability)
445 LLVM_DEBUG(
dbgs() <<
"Found cold edge: " << ThisBB->getName() <<
"->"
447 <<
"\nBranch Probability = " << SuccProb <<
"\n";);
450 DT.getDescendants(*SI, DominateVector);
452 "SI should be reachable and have at least itself as descendant");
455 if (!DominateVector.
front()->hasNPredecessors(1)) {
457 <<
" doesn't have a single predecessor in the "
458 "dominator tree\n";);
464 if (!(ExitBlock = IsSingleExit(DominateVector))) {
466 <<
" doesn't have a unique successor\n";);
471 for (
auto *BB : DominateVector)
472 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
482 <<
" inline cost-savings smaller than "
483 <<
ore::NV(
"Cost", MinOutlineRegionCost);
486 LLVM_DEBUG(
dbgs() <<
"ABORT: Outline region cost is smaller than "
487 << MinOutlineRegionCost <<
"\n";);
495 for (
auto *BB : DominateVector)
496 VisitedMap[BB] =
true;
500 FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegInfo(
501 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
502 OutliningInfo->ORI.push_back(
RegInfo);
504 << DominateVector.front()->getName() <<
"\n";);
505 ColdCandidateFound =
true;
506 NumColdRegionsFound++;
510 if (ColdCandidateFound)
511 return OutliningInfo;
513 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
516std::unique_ptr<FunctionOutliningInfo>
517PartialInlinerImpl::computeOutliningInfo(
Function &
F)
const {
520 if (!BR ||
BR->isUnconditional())
521 return std::unique_ptr<FunctionOutliningInfo>();
530 return isa<ReturnInst>(TI);
534 if (IsReturnBlock(Succ1))
535 return std::make_tuple(Succ1, Succ2);
536 if (IsReturnBlock(Succ2))
537 return std::make_tuple(Succ2, Succ1);
539 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
544 if (IsSuccessor(Succ1, Succ2))
545 return std::make_tuple(Succ1, Succ2);
546 if (IsSuccessor(Succ2, Succ1))
547 return std::make_tuple(Succ2, Succ1);
549 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
552 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
553 std::make_unique<FunctionOutliningInfo>();
556 bool CandidateFound =
false;
571 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
574 OutliningInfo->Entries.push_back(CurrEntry);
575 OutliningInfo->ReturnBlock = ReturnBlock;
576 OutliningInfo->NonReturnBlock = NonReturnBlock;
577 CandidateFound =
true;
582 std::tie(CommSucc,
OtherSucc) = GetCommonSucc(Succ1, Succ2);
587 OutliningInfo->Entries.push_back(CurrEntry);
592 return std::unique_ptr<FunctionOutliningInfo>();
596 assert(OutliningInfo->Entries[0] == &
F.front() &&
597 "Function Entry must be the first in Entries vector");
604 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
606 if (!Entries.count(Pred))
611 auto CheckAndNormalizeCandidate =
612 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
615 if (Entries.count(Succ))
617 if (Succ == OutliningInfo->ReturnBlock)
618 OutliningInfo->ReturnBlockPreds.push_back(
E);
619 else if (Succ != OutliningInfo->NonReturnBlock)
623 if (HasNonEntryPred(
E))
629 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
630 return std::unique_ptr<FunctionOutliningInfo>();
635 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
639 if (HasNonEntryPred(Cand))
646 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
647 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
654 OutliningInfo->Entries.push_back(Cand);
655 OutliningInfo->NonReturnBlock = NonReturnBlock;
656 OutliningInfo->ReturnBlockPreds.push_back(Cand);
657 Entries.insert(Cand);
660 return OutliningInfo;
665 if (
F.hasProfileData())
668 for (
auto *
E : OI.Entries) {
669 BranchInst *BR = dyn_cast<BranchInst>(
E->getTerminator());
670 if (!BR || BR->isUnconditional())
679 FunctionCloner &Cloner)
const {
680 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.
back().second;
682 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
683 auto OutliningCallFreq =
684 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
688 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
689 OutliningCallFreq = EntryFreq;
692 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
695 return OutlineRegionRelFreq;
710 return OutlineRegionRelFreq;
712 OutlineRegionRelFreq = std::max(
715 return OutlineRegionRelFreq;
718bool PartialInlinerImpl::shouldPartialInline(
724 assert(Callee == Cloner.ClonedFunc);
730 auto &CalleeTTI = GetTTI(*Callee);
731 bool RemarksEnabled =
732 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
736 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE :
nullptr);
741 <<
NV(
"Callee", Cloner.OrigFunc)
742 <<
" should always be fully inlined, not partially";
750 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
751 <<
NV(
"Caller", Caller)
752 <<
" because it should never be inlined (cost=never)";
760 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
761 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost="
762 <<
NV(
"Cost", IC.
getCost()) <<
", threshold="
774 if (NormWeightedSavings < WeightedOutliningRcost) {
778 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
779 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead="
780 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
782 <<
NV(
"Savings", (
unsigned)NormWeightedSavings.getFrequency())
784 <<
" of making the outlined call is too high";
792 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into "
793 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
804PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB,
811 switch (
I.getOpcode()) {
812 case Instruction::BitCast:
813 case Instruction::PtrToInt:
814 case Instruction::IntToPtr:
815 case Instruction::Alloca:
816 case Instruction::PHI:
818 case Instruction::GetElementPtr:
819 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
826 if (
I.isLifetimeStartOrEnd())
829 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
833 for (
Value *Val : II->args())
836 if (
auto *FPMO = dyn_cast<FPMathOperator>(II))
837 FMF = FPMO->getFastMathFlags();
844 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
864std::tuple<InstructionCost, InstructionCost>
865PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner)
const {
867 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
868 Function *OutlinedFunc = FuncBBPair.first;
869 BasicBlock* OutliningCallBB = FuncBBPair.second;
872 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873 OutliningFuncCallCost +=
874 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
878 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
880 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
881 "Outlined function cost should be no less than the outlined region");
886 OutlinedFunctionCost -=
890 OutliningFuncCallCost +
891 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
894 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
900void PartialInlinerImpl::computeCallsiteToProfCountMap(
906 std::unique_ptr<BlockFrequencyInfo> TempBFI;
916 CurrentCallerBFI = TempBFI.get();
919 CurrentCallerBFI = &(GetBFI(*Caller));
925 if (isa<BlockAddress>(
User))
929 if (CurrentCaller != Caller) {
931 ComputeCurrBFI(Caller);
933 assert(CurrentCallerBFI &&
"CallerBFI is not set");
938 CallSiteToProfCountMap[
User] = *Count;
940 CallSiteToProfCountMap[
User] = 0;
944PartialInlinerImpl::FunctionCloner::FunctionCloner(
948 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
949 ClonedOI = std::make_unique<FunctionOutliningInfo>();
955 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
956 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
958 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
962 ClonedOI->ReturnBlockPreds.push_back(NewE);
966 F->replaceAllUsesWith(ClonedFunc);
969PartialInlinerImpl::FunctionCloner::FunctionCloner(
970 Function *
F, FunctionOutliningMultiRegionInfo *OI,
974 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
975 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
983 for (
const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &
RegionInfo :
987 Region.push_back(cast<BasicBlock>(VMap[BB]));
993 NewReturnBlock = cast<BasicBlock>(VMap[
RegionInfo.ReturnBlock]);
994 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
995 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
996 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1000 F->replaceAllUsesWith(ClonedFunc);
1003void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock()
const {
1007 while (
I != BB->end()) {
1028 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1030 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1031 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1038 return PN->getIncomingValue(0);
1042 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1043 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1047 while (
I != PreReturn->
end()) {
1048 PHINode *OldPhi = dyn_cast<PHINode>(
I);
1056 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1059 for (
BasicBlock *
E : ClonedOI->ReturnBlockPreds) {
1068 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1074 for (
auto *DP : DeadPhis)
1075 DP->eraseFromParent();
1077 for (
auto *
E : ClonedOI->ReturnBlockPreds)
1078 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1081bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1083 auto ComputeRegionCost =
1087 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1091 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1093 if (ClonedOMRI->ORI.empty())
1109 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1115 ClonedFuncBFI.get(), &BPI,
1116 LookupAC(*
RegionInfo.EntryBlock->getParent()),
1119 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1122 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1123 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1125 dbgs() <<
"value used in func: " << *
value <<
"\n";
1126 for (
Value *output : Outputs)
1127 dbgs() <<
"instr used in func: " << *output <<
"\n";
1134 if (
Function *OutlinedFunc =
CE.extractCodeRegion(CEAC)) {
1135 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1138 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139 NumColdRegionsOutlined++;
1140 OutlinedRegionCost += CurrentOutlinedRegionCost;
1150 <<
"Failed to extract region at block "
1155 return !OutlinedFunctions.empty();
1159PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1162 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1163 return BB == ClonedOI->ReturnBlock ||
1167 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1178 std::vector<BasicBlock *> ToExtract;
1179 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1180 ToExtract.push_back(ClonedOI->NonReturnBlock);
1181 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1184 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1185 ToExtract.push_back(BB);
1190 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1197 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1203 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->
getParent();
1205 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1209 &ToExtract.front()->front())
1210 <<
"Failed to extract region at block "
1211 <<
ore::NV(
"Block", ToExtract.front());
1214 return OutlinedFunc;
1217PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1221 ClonedFunc->eraseFromParent();
1222 if (!IsFunctionInlined) {
1225 for (
auto FuncBBPair : OutlinedFunctions) {
1227 Func->eraseFromParent();
1232std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function &
F) {
1233 if (
F.hasAddressTaken())
1234 return {
false,
nullptr};
1237 if (
F.hasFnAttribute(Attribute::AlwaysInline))
1238 return {
false,
nullptr};
1240 if (
F.hasFnAttribute(Attribute::NoInline))
1241 return {
false,
nullptr};
1243 if (PSI.isFunctionEntryCold(&
F))
1244 return {
false,
nullptr};
1246 if (
F.users().empty())
1247 return {
false,
nullptr};
1253 if (PSI.hasProfileSummary() &&
F.hasProfileData() &&
1255 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256 computeOutliningColdRegionsInfo(
F, ORE);
1258 FunctionCloner Cloner(&
F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1261 dbgs() <<
"HotCountThreshold = " << PSI.getHotCountThreshold() <<
"\n";
1262 dbgs() <<
"ColdCountThreshold = " << PSI.getColdCountThreshold()
1266 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1270 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271 Cloner.ClonedFunc->print(
dbgs());
1272 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1275 if (tryPartialInline(Cloner))
1276 return {
true,
nullptr};
1284 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(
F);
1286 return {
false,
nullptr};
1288 FunctionCloner Cloner(&
F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289 Cloner.normalizeReturnBlock();
1291 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1293 if (!OutlinedFunction)
1294 return {
false,
nullptr};
1296 if (tryPartialInline(Cloner))
1297 return {
true, OutlinedFunction};
1299 return {
false,
nullptr};
1302bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303 if (Cloner.OutlinedFunctions.empty())
1306 auto OutliningCosts = computeOutliningCosts(Cloner);
1312 "Expected valid costs");
1317 if (Cloner.ClonedOI)
1318 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1338 std::tie(DLoc,
Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339 OrigFuncORE.emit([&]() {
1342 <<
ore::NV(
"Function", Cloner.OrigFunc)
1343 <<
" not partially inlined into callers (Original Size = "
1344 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1345 <<
", Size of call sequence to outlined function = "
1346 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1351 assert(Cloner.OrigFunc->users().empty() &&
1352 "F's users should all be replaced!");
1354 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1355 Cloner.ClonedFunc->user_end());
1358 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359 if (CalleeEntryCount)
1360 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1363 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1365 bool AnyInline =
false;
1368 if (isa<BlockAddress>(
User))
1373 if (isLimitReached())
1377 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1383 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into "
1390 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1398 if (CalleeEntryCountV && CallSiteToProfCountMap.
count(
User)) {
1400 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1404 NumPartialInlining++;
1406 if (Cloner.ClonedOI)
1407 NumPartialInlined++;
1409 NumColdOutlinePartialInlined++;
1413 Cloner.IsFunctionInlined =
true;
1414 if (CalleeEntryCount)
1416 CalleeEntryCountV, CalleeEntryCount->getType()));
1418 OrigFuncORE.emit([&]() {
1420 <<
"Partially inlined into at least one caller";
1427bool PartialInlinerImpl::run(
Module &M) {
1431 std::vector<Function *> Worklist;
1432 Worklist.reserve(
M.size());
1434 if (!
F.use_empty() && !
F.isDeclaration())
1435 Worklist.push_back(&
F);
1437 bool Changed =
false;
1438 while (!Worklist.empty()) {
1440 Worklist.pop_back();
1445 std::pair<bool, Function *>
Result = unswitchFunction(*CurrFunc);
1447 Worklist.push_back(
Result.second);
1480 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481 GetTLI, PSI, GetBFI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Given that RA is a live value
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
Module.h This file contains the declarations for the Module class.
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Instruction & back() const
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent profile counts.
const BasicBlock & back() const
void setCallingConv(CallingConv::ID CC)
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ BR
Control flow instructions. These all have token chains.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
Interval::succ_iterator succ_end(Interval *I)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.