64#define DEBUG_TYPE "partial-inlining"
67 "Number of callsites functions partially inlined into.");
68STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with "
69 "cold outlined regions were partially "
70 "inlined into its caller(s).");
72 "Number of cold single entry/exit regions found.");
74 "Number of cold single entry/exit regions outlined.");
84 cl::desc(
"Disable multi-region partial inlining"));
90 cl::desc(
"Force outline regions with live exits"));
96 cl::desc(
"Mark outline function calls with ColdCC"));
109 cl::desc(
"Minimum ratio comparing relative sizes of each "
110 "outline candidate and original function"));
115 cl::desc(
"Minimum block executions to consider "
116 "its BranchProbabilityInfo valid"));
121 cl::desc(
"Minimum BranchProbability to consider a region cold."));
125 cl::desc(
"Max number of blocks to be partially inlined"));
131 cl::desc(
"Max number of partial inlining. The default is unlimited"));
139 cl::desc(
"Relative frequency of outline region to "
144 cl::desc(
"A debug option to add additional penalty to the computed one."));
148struct FunctionOutliningInfo {
149 FunctionOutliningInfo() =
default;
153 unsigned getNumInlinedBlocks()
const {
return Entries.size() + 1; }
169struct FunctionOutliningMultiRegionInfo {
170 FunctionOutliningMultiRegionInfo() =
default;
173 struct OutlineRegionInfo {
178 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
188struct PartialInlinerImpl {
197 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
198 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
208 std::pair<bool, Function *> unswitchFunction(
Function &
F);
214 struct FunctionCloner {
217 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
221 FunctionCloner(
Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
231 void normalizeReturnBlock()
const;
234 bool doMultiRegionFunctionOutlining();
241 Function *doSingleRegionFunctionOutlining();
246 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
252 bool IsFunctionInlined =
false;
256 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
258 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
259 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
266 int NumPartialInlining = 0;
279 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner)
const;
283 bool shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner,
290 bool tryPartialInline(FunctionCloner &Cloner);
295 computeCallsiteToProfCountMap(
Function *DuplicateFunction,
298 bool isLimitReached()
const {
304 if (isa<CallInst>(U) || isa<InvokeInst>(U))
305 return cast<CallBase>(U);
312 return getSupportedCallBase(
User);
315 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function &
F)
const {
319 return std::make_tuple(DLoc, Block);
328 std::tuple<InstructionCost, InstructionCost>
329 computeOutliningCosts(FunctionCloner &Cloner)
const;
337 std::unique_ptr<FunctionOutliningInfo>
340 std::unique_ptr<FunctionOutliningMultiRegionInfo>
341 computeOutliningColdRegionsInfo(
Function &
F,
347std::unique_ptr<FunctionOutliningMultiRegionInfo>
348PartialInlinerImpl::computeOutliningColdRegionsInfo(
355 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
359 BFI = ScopedBFI.get();
364 if (!PSI.hasInstrumentationProfile())
365 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
367 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
368 std::make_unique<FunctionOutliningMultiRegionInfo>();
373 for (
auto *Block : BlockList) {
380 <<
"Region dominated by "
381 <<
ore::NV(
"Block", BlockList.front()->getName())
382 <<
" has more than one region exit edge.";
395 return BFI->getBlockProfileCount(BB).value_or(0);
403 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
405 LLVM_DEBUG(
dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost
414 bool ColdCandidateFound =
false;
416 std::vector<BasicBlock *> DFS;
418 DFS.push_back(CurrEntry);
419 VisitedMap[CurrEntry] =
true;
427 while (!DFS.empty()) {
428 auto *ThisBB = DFS.back();
433 if (PSI.isColdBlock(ThisBB, BFI) ||
439 VisitedMap[*
SI] =
true;
443 if (SuccProb > MinBranchProbability)
446 LLVM_DEBUG(
dbgs() <<
"Found cold edge: " << ThisBB->getName() <<
"->"
448 <<
"\nBranch Probability = " << SuccProb <<
"\n";);
451 DT.getDescendants(*SI, DominateVector);
453 "SI should be reachable and have at least itself as descendant");
456 if (!DominateVector.
front()->hasNPredecessors(1)) {
458 <<
" doesn't have a single predecessor in the "
459 "dominator tree\n";);
465 if (!(ExitBlock = IsSingleExit(DominateVector))) {
467 <<
" doesn't have a unique successor\n";);
472 for (
auto *BB : DominateVector)
473 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
483 <<
" inline cost-savings smaller than "
484 <<
ore::NV(
"Cost", MinOutlineRegionCost);
487 LLVM_DEBUG(
dbgs() <<
"ABORT: Outline region cost is smaller than "
488 << MinOutlineRegionCost <<
"\n";);
496 for (
auto *BB : DominateVector)
497 VisitedMap[BB] =
true;
501 FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegInfo(
502 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
503 OutliningInfo->ORI.push_back(
RegInfo);
505 << DominateVector.front()->getName() <<
"\n";);
506 ColdCandidateFound =
true;
507 NumColdRegionsFound++;
511 if (ColdCandidateFound)
512 return OutliningInfo;
514 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
517std::unique_ptr<FunctionOutliningInfo>
518PartialInlinerImpl::computeOutliningInfo(
Function &
F)
const {
521 if (!BR ||
BR->isUnconditional())
522 return std::unique_ptr<FunctionOutliningInfo>();
531 return isa<ReturnInst>(TI);
535 if (IsReturnBlock(Succ1))
536 return std::make_tuple(Succ1, Succ2);
537 if (IsReturnBlock(Succ2))
538 return std::make_tuple(Succ2, Succ1);
540 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
545 if (IsSuccessor(Succ1, Succ2))
546 return std::make_tuple(Succ1, Succ2);
547 if (IsSuccessor(Succ2, Succ1))
548 return std::make_tuple(Succ2, Succ1);
550 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
553 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
554 std::make_unique<FunctionOutliningInfo>();
557 bool CandidateFound =
false;
572 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
575 OutliningInfo->Entries.push_back(CurrEntry);
576 OutliningInfo->ReturnBlock = ReturnBlock;
577 OutliningInfo->NonReturnBlock = NonReturnBlock;
578 CandidateFound =
true;
583 std::tie(CommSucc,
OtherSucc) = GetCommonSucc(Succ1, Succ2);
588 OutliningInfo->Entries.push_back(CurrEntry);
593 return std::unique_ptr<FunctionOutliningInfo>();
597 assert(OutliningInfo->Entries[0] == &
F.front() &&
598 "Function Entry must be the first in Entries vector");
605 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
607 if (!Entries.count(Pred))
612 auto CheckAndNormalizeCandidate =
613 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
616 if (Entries.count(Succ))
618 if (Succ == OutliningInfo->ReturnBlock)
619 OutliningInfo->ReturnBlockPreds.push_back(
E);
620 else if (Succ != OutliningInfo->NonReturnBlock)
624 if (HasNonEntryPred(
E))
630 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
631 return std::unique_ptr<FunctionOutliningInfo>();
636 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
640 if (HasNonEntryPred(Cand))
647 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
648 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
655 OutliningInfo->Entries.push_back(Cand);
656 OutliningInfo->NonReturnBlock = NonReturnBlock;
657 OutliningInfo->ReturnBlockPreds.push_back(Cand);
658 Entries.insert(Cand);
661 return OutliningInfo;
666 if (
F.hasProfileData())
669 for (
auto *
E : OI.Entries) {
670 BranchInst *BR = dyn_cast<BranchInst>(
E->getTerminator());
671 if (!BR || BR->isUnconditional())
680 FunctionCloner &Cloner)
const {
681 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.
back().second;
683 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
684 auto OutliningCallFreq =
685 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
689 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
690 OutliningCallFreq = EntryFreq;
693 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
696 return OutlineRegionRelFreq;
711 return OutlineRegionRelFreq;
713 OutlineRegionRelFreq = std::max(
716 return OutlineRegionRelFreq;
719bool PartialInlinerImpl::shouldPartialInline(
731 auto &CalleeTTI = GetTTI(*
Callee);
732 bool RemarksEnabled =
733 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
737 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE :
nullptr);
742 <<
NV(
"Callee", Cloner.OrigFunc)
743 <<
" should always be fully inlined, not partially";
751 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
752 <<
NV(
"Caller", Caller)
753 <<
" because it should never be inlined (cost=never)";
761 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
762 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost="
763 <<
NV(
"Cost", IC.
getCost()) <<
", threshold="
775 if (NormWeightedSavings < WeightedOutliningRcost) {
779 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
780 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead="
781 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
783 <<
NV(
"Savings", (
unsigned)NormWeightedSavings.getFrequency())
785 <<
" of making the outlined call is too high";
793 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into "
794 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
805PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB,
812 switch (
I.getOpcode()) {
813 case Instruction::BitCast:
814 case Instruction::PtrToInt:
815 case Instruction::IntToPtr:
816 case Instruction::Alloca:
817 case Instruction::PHI:
819 case Instruction::GetElementPtr:
820 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
827 if (
I.isLifetimeStartOrEnd())
830 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
834 for (
Value *Val : II->args())
837 if (
auto *FPMO = dyn_cast<FPMathOperator>(II))
838 FMF = FPMO->getFastMathFlags();
845 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
865std::tuple<InstructionCost, InstructionCost>
866PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner)
const {
868 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
869 Function *OutlinedFunc = FuncBBPair.first;
870 BasicBlock* OutliningCallBB = FuncBBPair.second;
873 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
874 OutliningFuncCallCost +=
875 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
879 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
881 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
882 "Outlined function cost should be no less than the outlined region");
887 OutlinedFunctionCost -=
891 OutliningFuncCallCost +
892 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
895 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
901void PartialInlinerImpl::computeCallsiteToProfCountMap(
907 std::unique_ptr<BlockFrequencyInfo> TempBFI;
917 CurrentCallerBFI = TempBFI.get();
920 CurrentCallerBFI = &(GetBFI(*Caller));
926 if (isa<BlockAddress>(
User))
930 if (CurrentCaller != Caller) {
932 ComputeCurrBFI(Caller);
934 assert(CurrentCallerBFI &&
"CallerBFI is not set");
939 CallSiteToProfCountMap[
User] = *Count;
941 CallSiteToProfCountMap[
User] = 0;
945PartialInlinerImpl::FunctionCloner::FunctionCloner(
949 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
950 ClonedOI = std::make_unique<FunctionOutliningInfo>();
956 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
957 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
959 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
963 ClonedOI->ReturnBlockPreds.push_back(NewE);
967 F->replaceAllUsesWith(ClonedFunc);
970PartialInlinerImpl::FunctionCloner::FunctionCloner(
971 Function *
F, FunctionOutliningMultiRegionInfo *OI,
975 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
976 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
984 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
988 Region.push_back(cast<BasicBlock>(VMap[BB]));
994 NewReturnBlock = cast<BasicBlock>(VMap[
RegionInfo.ReturnBlock]);
995 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
996 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
997 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1001 F->replaceAllUsesWith(ClonedFunc);
1004void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock()
const {
1008 while (
I != BB->end()) {
1009 PHINode *Phi = dyn_cast<PHINode>(
I);
1029 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1031 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1032 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1039 return PN->getIncomingValue(0);
1043 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1044 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1048 while (
I != PreReturn->
end()) {
1049 PHINode *OldPhi = dyn_cast<PHINode>(
I);
1056 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1059 for (
BasicBlock *
E : ClonedOI->ReturnBlockPreds) {
1068 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1074 for (
auto *DP : DeadPhis)
1075 DP->eraseFromParent();
1077 for (
auto *
E : ClonedOI->ReturnBlockPreds)
1078 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1081bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1083 auto ComputeRegionCost =
1087 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1091 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1093 if (ClonedOMRI->ORI.empty())
1109 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1115 ClonedFuncBFI.get(), &BPI,
1116 LookupAC(*
RegionInfo.EntryBlock->getParent()),
1119 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1122 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1123 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1125 dbgs() <<
"value used in func: " << *
value <<
"\n";
1126 for (
Value *output : Outputs)
1127 dbgs() <<
"instr used in func: " << *output <<
"\n";
1134 if (
Function *OutlinedFunc =
CE.extractCodeRegion(CEAC)) {
1135 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1138 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139 NumColdRegionsOutlined++;
1140 OutlinedRegionCost += CurrentOutlinedRegionCost;
1150 <<
"Failed to extract region at block "
1155 return !OutlinedFunctions.empty();
1159PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1162 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1163 return BB == ClonedOI->ReturnBlock ||
1167 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1178 std::vector<BasicBlock *> ToExtract;
1179 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1180 ToExtract.push_back(ClonedOI->NonReturnBlock);
1181 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1184 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1185 ToExtract.push_back(&BB);
1190 OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
1197 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1203 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->
getParent();
1205 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1209 &ToExtract.front()->front())
1210 <<
"Failed to extract region at block "
1211 <<
ore::NV(
"Block", ToExtract.front());
1214 return OutlinedFunc;
1217PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1221 ClonedFunc->eraseFromParent();
1222 if (!IsFunctionInlined) {
1225 for (
auto FuncBBPair : OutlinedFunctions) {
1227 Func->eraseFromParent();
1232std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function &
F) {
1233 if (
F.hasAddressTaken())
1234 return {
false,
nullptr};
1237 if (
F.hasFnAttribute(Attribute::AlwaysInline))
1238 return {
false,
nullptr};
1240 if (
F.hasFnAttribute(Attribute::NoInline))
1241 return {
false,
nullptr};
1243 if (PSI.isFunctionEntryCold(&
F))
1244 return {
false,
nullptr};
1246 if (
F.users().empty())
1247 return {
false,
nullptr};
1253 if (PSI.hasProfileSummary() &&
F.hasProfileData() &&
1255 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256 computeOutliningColdRegionsInfo(
F, ORE);
1258 FunctionCloner Cloner(&
F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1261 dbgs() <<
"HotCountThreshold = " << PSI.getHotCountThreshold() <<
"\n";
1262 dbgs() <<
"ColdCountThreshold = " << PSI.getColdCountThreshold()
1266 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1270 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271 Cloner.ClonedFunc->print(
dbgs());
1272 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1275 if (tryPartialInline(Cloner))
1276 return {
true,
nullptr};
1284 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(
F);
1286 return {
false,
nullptr};
1288 FunctionCloner Cloner(&
F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289 Cloner.normalizeReturnBlock();
1291 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1293 if (!OutlinedFunction)
1294 return {
false,
nullptr};
1296 if (tryPartialInline(Cloner))
1297 return {
true, OutlinedFunction};
1299 return {
false,
nullptr};
1302bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303 if (Cloner.OutlinedFunctions.empty())
1306 auto OutliningCosts = computeOutliningCosts(Cloner);
1312 "Expected valid costs");
1317 if (Cloner.ClonedOI)
1318 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1338 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339 OrigFuncORE.emit([&]() {
1342 <<
ore::NV(
"Function", Cloner.OrigFunc)
1343 <<
" not partially inlined into callers (Original Size = "
1344 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1345 <<
", Size of call sequence to outlined function = "
1346 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1351 assert(Cloner.OrigFunc->users().empty() &&
1352 "F's users should all be replaced!");
1354 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1355 Cloner.ClonedFunc->user_end());
1358 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359 if (CalleeEntryCount)
1360 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1363 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1365 bool AnyInline =
false;
1368 if (isa<BlockAddress>(
User))
1373 if (isLimitReached())
1377 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1383 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into "
1390 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1398 if (CalleeEntryCountV && CallSiteToProfCountMap.
count(
User)) {
1400 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1404 NumPartialInlining++;
1406 if (Cloner.ClonedOI)
1407 NumPartialInlined++;
1409 NumColdOutlinePartialInlined++;
1413 Cloner.IsFunctionInlined =
true;
1414 if (CalleeEntryCount)
1416 CalleeEntryCountV, CalleeEntryCount->getType()));
1418 OrigFuncORE.emit([&]() {
1420 <<
"Partially inlined into at least one caller";
1427bool PartialInlinerImpl::run(
Module &M) {
1431 std::vector<Function *> Worklist;
1432 Worklist.reserve(
M.size());
1434 if (!
F.use_empty() && !
F.isDeclaration())
1435 Worklist.push_back(&
F);
1437 bool Changed =
false;
1438 while (!Worklist.empty()) {
1440 Worklist.pop_back();
1445 std::pair<bool, Function *>
Result = unswitchFunction(*CurrFunc);
1447 Worklist.push_back(
Result.second);
1480 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481 GetTLI, PSI, GetBFI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Given that RA is a live value
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
Module.h This file contains the declarations for the Module class.
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Instruction & back() const
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent profile counts.
const BasicBlock & back() const
void setCallingConv(CallingConv::ID CC)
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ BR
Control flow instructions. These all have token chains.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
Interval::succ_iterator succ_end(Interval *I)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.