63#define DEBUG_TYPE "partial-inlining"
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
71 "Number of cold single entry/exit regions found.");
73 "Number of cold single entry/exit regions outlined.");
83 cl::desc(
"Disable multi-region partial inlining"));
89 cl::desc(
"Force outline regions with live exits"));
95 cl::desc(
"Mark outline function calls with ColdCC"));
108 cl::desc(
"Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
114 cl::desc(
"Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
120 cl::desc(
"Minimum BranchProbability to consider a region cold."));
124 cl::desc(
"Max number of blocks to be partially inlined"));
130 cl::desc(
"Max number of partial inlining. The default is unlimited"));
138 cl::desc(
"Relative frequency of outline region to "
143 cl::desc(
"A debug option to add additional penalty to the computed one."));
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() =
default;
152 unsigned getNumInlinedBlocks()
const {
return Entries.size() + 1; }
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() =
default;
172 struct OutlineRegionInfo {
175 :
Region(
Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
186struct PartialInlinerImpl {
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
206 std::pair<bool, Function *> unswitchFunction(
Function &
F);
212 struct FunctionCloner {
215 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
219 FunctionCloner(
Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
229 void normalizeReturnBlock()
const;
232 bool doMultiRegionFunctionOutlining();
239 Function *doSingleRegionFunctionOutlining();
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250 bool IsFunctionInlined =
false;
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
264 int NumPartialInlining = 0;
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner)
const;
281 bool shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner,
288 bool tryPartialInline(FunctionCloner &Cloner);
293 computeCallsiteToProfCountMap(
Function *DuplicateFunction,
296 bool isLimitReached()
const {
302 if (isa<CallInst>(U) || isa<InvokeInst>(U))
303 return cast<CallBase>(U);
310 return getSupportedCallBase(
User);
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function &
F)
const {
317 return std::make_tuple(DLoc,
Block);
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner)
const;
335 std::unique_ptr<FunctionOutliningInfo>
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(
Function &
F,
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
357 BFI = ScopedBFI.get();
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
371 for (
auto *
Block : BlockList) {
378 <<
"Region dominated by "
379 <<
ore::NV(
"Block", BlockList.front()->getName())
380 <<
" has more than one region exit edge.";
393 return BFI->getBlockProfileCount(BB).value_or(0);
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
403 LLVM_DEBUG(
dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost
412 bool ColdCandidateFound =
false;
414 std::vector<BasicBlock *> DFS;
416 DFS.push_back(CurrEntry);
417 VisitedMap[CurrEntry] =
true;
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
431 if (PSI.isColdBlock(ThisBB, BFI) ||
437 VisitedMap[*
SI] =
true;
441 if (SuccProb > MinBranchProbability)
444 LLVM_DEBUG(
dbgs() <<
"Found cold edge: " << ThisBB->getName() <<
"->"
446 <<
"\nBranch Probability = " << SuccProb <<
"\n";);
449 DT.getDescendants(*SI, DominateVector);
451 "SI should be reachable and have at least itself as descendant");
454 if (!DominateVector.
front()->hasNPredecessors(1)) {
456 <<
" doesn't have a single predecessor in the "
457 "dominator tree\n";);
463 if (!(ExitBlock = IsSingleExit(DominateVector))) {
465 <<
" doesn't have a unique successor\n";);
470 for (
auto *BB : DominateVector)
471 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
481 <<
" inline cost-savings smaller than "
482 <<
ore::NV(
"Cost", MinOutlineRegionCost);
485 LLVM_DEBUG(
dbgs() <<
"ABORT: Outline region cost is smaller than "
486 << MinOutlineRegionCost <<
"\n";);
494 for (
auto *BB : DominateVector)
495 VisitedMap[BB] =
true;
499 FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegInfo(
500 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
501 OutliningInfo->ORI.push_back(
RegInfo);
503 << DominateVector.front()->getName() <<
"\n";);
504 ColdCandidateFound =
true;
505 NumColdRegionsFound++;
509 if (ColdCandidateFound)
510 return OutliningInfo;
512 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
515std::unique_ptr<FunctionOutliningInfo>
516PartialInlinerImpl::computeOutliningInfo(
Function &
F)
const {
519 if (!BR ||
BR->isUnconditional())
520 return std::unique_ptr<FunctionOutliningInfo>();
529 return isa<ReturnInst>(TI);
533 if (IsReturnBlock(Succ1))
534 return std::make_tuple(Succ1, Succ2);
535 if (IsReturnBlock(Succ2))
536 return std::make_tuple(Succ2, Succ1);
538 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
543 if (IsSuccessor(Succ1, Succ2))
544 return std::make_tuple(Succ1, Succ2);
545 if (IsSuccessor(Succ2, Succ1))
546 return std::make_tuple(Succ2, Succ1);
548 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
551 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
552 std::make_unique<FunctionOutliningInfo>();
555 bool CandidateFound =
false;
570 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
573 OutliningInfo->Entries.push_back(CurrEntry);
574 OutliningInfo->ReturnBlock = ReturnBlock;
575 OutliningInfo->NonReturnBlock = NonReturnBlock;
576 CandidateFound =
true;
581 std::tie(CommSucc,
OtherSucc) = GetCommonSucc(Succ1, Succ2);
586 OutliningInfo->Entries.push_back(CurrEntry);
591 return std::unique_ptr<FunctionOutliningInfo>();
595 assert(OutliningInfo->Entries[0] == &
F.front() &&
596 "Function Entry must be the first in Entries vector");
603 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
605 if (!Entries.count(Pred))
610 auto CheckAndNormalizeCandidate =
611 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
612 for (
BasicBlock *E : OutliningInfo->Entries) {
614 if (Entries.count(Succ))
616 if (Succ == OutliningInfo->ReturnBlock)
617 OutliningInfo->ReturnBlockPreds.push_back(E);
618 else if (Succ != OutliningInfo->NonReturnBlock)
622 if (HasNonEntryPred(E))
628 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
629 return std::unique_ptr<FunctionOutliningInfo>();
634 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
638 if (HasNonEntryPred(Cand))
645 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
646 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
653 OutliningInfo->Entries.push_back(Cand);
654 OutliningInfo->NonReturnBlock = NonReturnBlock;
655 OutliningInfo->ReturnBlockPreds.push_back(Cand);
656 Entries.insert(Cand);
659 return OutliningInfo;
664 if (
F.hasProfileData())
667 for (
auto *E : OI.Entries) {
669 if (!BR || BR->isUnconditional())
678 FunctionCloner &Cloner)
const {
679 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.
back().second;
681 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
682 auto OutliningCallFreq =
683 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
687 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
688 OutliningCallFreq = EntryFreq;
691 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
694 return OutlineRegionRelFreq;
709 return OutlineRegionRelFreq;
711 OutlineRegionRelFreq = std::max(
714 return OutlineRegionRelFreq;
717bool PartialInlinerImpl::shouldPartialInline(
723 assert(Callee == Cloner.ClonedFunc);
729 auto &CalleeTTI = GetTTI(*Callee);
730 bool RemarksEnabled =
731 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
735 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE :
nullptr);
740 <<
NV(
"Callee", Cloner.OrigFunc)
741 <<
" should always be fully inlined, not partially";
749 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
750 <<
NV(
"Caller", Caller)
751 <<
" because it should never be inlined (cost=never)";
759 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
760 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost="
761 <<
NV(
"Cost", IC.
getCost()) <<
", threshold="
773 if (NormWeightedSavings < WeightedOutliningRcost) {
777 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
778 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead="
779 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
781 <<
NV(
"Savings", (
unsigned)NormWeightedSavings.getFrequency())
783 <<
" of making the outlined call is too high";
791 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into "
792 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
803PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB,
810 switch (
I.getOpcode()) {
811 case Instruction::BitCast:
812 case Instruction::PtrToInt:
813 case Instruction::IntToPtr:
814 case Instruction::Alloca:
815 case Instruction::PHI:
817 case Instruction::GetElementPtr:
818 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
825 if (
I.isLifetimeStartOrEnd())
828 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
835 if (
auto *FPMO = dyn_cast<FPMathOperator>(
II))
836 FMF = FPMO->getFastMathFlags();
843 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
863std::tuple<InstructionCost, InstructionCost>
864PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner)
const {
866 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
867 Function *OutlinedFunc = FuncBBPair.first;
868 BasicBlock* OutliningCallBB = FuncBBPair.second;
871 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
872 OutliningFuncCallCost +=
873 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
877 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
879 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
880 "Outlined function cost should be no less than the outlined region");
885 OutlinedFunctionCost -=
889 OutliningFuncCallCost +
890 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
893 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
899void PartialInlinerImpl::computeCallsiteToProfCountMap(
905 std::unique_ptr<BlockFrequencyInfo> TempBFI;
915 CurrentCallerBFI = TempBFI.get();
918 CurrentCallerBFI = &(GetBFI(*Caller));
924 if (isa<BlockAddress>(
User))
928 if (CurrentCaller != Caller) {
930 ComputeCurrBFI(Caller);
932 assert(CurrentCallerBFI &&
"CallerBFI is not set");
937 CallSiteToProfCountMap[
User] = *Count;
939 CallSiteToProfCountMap[
User] = 0;
943PartialInlinerImpl::FunctionCloner::FunctionCloner(
947 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
948 ClonedOI = std::make_unique<FunctionOutliningInfo>();
954 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
955 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
957 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
961 ClonedOI->ReturnBlockPreds.push_back(NewE);
965 F->replaceAllUsesWith(ClonedFunc);
968PartialInlinerImpl::FunctionCloner::FunctionCloner(
969 Function *
F, FunctionOutliningMultiRegionInfo *OI,
973 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
974 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
982 for (
const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &
RegionInfo :
986 Region.push_back(cast<BasicBlock>(VMap[BB]));
992 NewReturnBlock = cast<BasicBlock>(VMap[
RegionInfo.ReturnBlock]);
993 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
994 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
995 ClonedOMRI->ORI.push_back(MappedRegionInfo);
999 F->replaceAllUsesWith(ClonedFunc);
1002void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock()
const {
1006 while (
I != BB->end()) {
1027 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1029 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1030 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1037 return PN->getIncomingValue(0);
1041 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1042 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1046 while (
I != PreReturn->
end()) {
1047 PHINode *OldPhi = dyn_cast<PHINode>(
I);
1055 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1058 for (
BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1067 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1073 for (
auto *DP : DeadPhis)
1074 DP->eraseFromParent();
1076 for (
auto *E : ClonedOI->ReturnBlockPreds)
1080bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082 auto ComputeRegionCost =
1086 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1090 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1092 if (ClonedOMRI->ORI.empty())
1108 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1114 ClonedFuncBFI.get(), &BPI,
1115 LookupAC(*
RegionInfo.EntryBlock->getParent()),
1118 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1121 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1122 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1124 dbgs() <<
"value used in func: " << *
value <<
"\n";
1125 for (
Value *output : Outputs)
1126 dbgs() <<
"instr used in func: " << *output <<
"\n";
1133 if (
Function *OutlinedFunc =
CE.extractCodeRegion(CEAC)) {
1134 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1137 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1138 NumColdRegionsOutlined++;
1139 OutlinedRegionCost += CurrentOutlinedRegionCost;
1149 <<
"Failed to extract region at block "
1154 return !OutlinedFunctions.empty();
1158PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1161 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1162 return BB == ClonedOI->ReturnBlock ||
1166 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1177 std::vector<BasicBlock *> ToExtract;
1178 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1179 ToExtract.push_back(ClonedOI->NonReturnBlock);
1180 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1181 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1183 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1184 ToExtract.push_back(BB);
1189 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1196 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1202 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->
getParent();
1204 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1208 &ToExtract.front()->front())
1209 <<
"Failed to extract region at block "
1210 <<
ore::NV(
"Block", ToExtract.front());
1213 return OutlinedFunc;
1216PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1220 ClonedFunc->eraseFromParent();
1221 if (!IsFunctionInlined) {
1224 for (
auto FuncBBPair : OutlinedFunctions) {
1226 Func->eraseFromParent();
1231std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function &
F) {
1232 if (
F.hasAddressTaken())
1233 return {
false,
nullptr};
1236 if (
F.hasFnAttribute(Attribute::AlwaysInline))
1237 return {
false,
nullptr};
1239 if (
F.hasFnAttribute(Attribute::NoInline))
1240 return {
false,
nullptr};
1242 if (PSI.isFunctionEntryCold(&
F))
1243 return {
false,
nullptr};
1245 if (
F.users().empty())
1246 return {
false,
nullptr};
1252 if (PSI.hasProfileSummary() &&
F.hasProfileData() &&
1254 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1255 computeOutliningColdRegionsInfo(
F, ORE);
1257 FunctionCloner Cloner(&
F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1260 dbgs() <<
"HotCountThreshold = " << PSI.getHotCountThreshold() <<
"\n";
1261 dbgs() <<
"ColdCountThreshold = " << PSI.getColdCountThreshold()
1265 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1269 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1270 Cloner.ClonedFunc->print(
dbgs());
1271 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1274 if (tryPartialInline(Cloner))
1275 return {
true,
nullptr};
1283 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(
F);
1285 return {
false,
nullptr};
1287 FunctionCloner Cloner(&
F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1288 Cloner.normalizeReturnBlock();
1290 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292 if (!OutlinedFunction)
1293 return {
false,
nullptr};
1295 if (tryPartialInline(Cloner))
1296 return {
true, OutlinedFunction};
1298 return {
false,
nullptr};
1301bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1302 if (Cloner.OutlinedFunctions.empty())
1305 auto OutliningCosts = computeOutliningCosts(Cloner);
1311 "Expected valid costs");
1316 if (Cloner.ClonedOI)
1317 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1337 std::tie(DLoc,
Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1338 OrigFuncORE.emit([&]() {
1341 <<
ore::NV(
"Function", Cloner.OrigFunc)
1342 <<
" not partially inlined into callers (Original Size = "
1343 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1344 <<
", Size of call sequence to outlined function = "
1345 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1350 assert(Cloner.OrigFunc->users().empty() &&
1351 "F's users should all be replaced!");
1353 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1354 Cloner.ClonedFunc->user_end());
1357 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1358 if (CalleeEntryCount)
1359 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1362 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1364 bool AnyInline =
false;
1367 if (isa<BlockAddress>(
User))
1372 if (isLimitReached())
1376 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1382 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into "
1389 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1397 if (CalleeEntryCountV && CallSiteToProfCountMap.
count(
User)) {
1399 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1403 NumPartialInlining++;
1405 if (Cloner.ClonedOI)
1406 NumPartialInlined++;
1408 NumColdOutlinePartialInlined++;
1412 Cloner.IsFunctionInlined =
true;
1413 if (CalleeEntryCount)
1415 CalleeEntryCountV, CalleeEntryCount->getType()));
1417 OrigFuncORE.emit([&]() {
1419 <<
"Partially inlined into at least one caller";
1426bool PartialInlinerImpl::run(
Module &M) {
1430 std::vector<Function *> Worklist;
1431 Worklist.reserve(
M.size());
1433 if (!
F.use_empty() && !
F.isDeclaration())
1434 Worklist.push_back(&
F);
1436 bool Changed =
false;
1437 while (!Worklist.empty()) {
1439 Worklist.pop_back();
1444 std::pair<bool, Function *>
Result = unswitchFunction(*CurrFunc);
1446 Worklist.push_back(
Result.second);
1479 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1480 GetTLI, PSI, GetBFI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
Given that RA is a live value
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Instruction & back() const
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent profile counts.
const BasicBlock & back() const
void setCallingConv(CallingConv::ID CC)
Represents the cost of inlining a function.
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ BR
Control flow instructions. These all have token chains.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.