63#define DEBUG_TYPE "partial-inlining"
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
71 "Number of cold single entry/exit regions found.");
73 "Number of cold single entry/exit regions outlined.");
83 cl::desc(
"Disable multi-region partial inlining"));
89 cl::desc(
"Force outline regions with live exits"));
95 cl::desc(
"Mark outline function calls with ColdCC"));
108 cl::desc(
"Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
114 cl::desc(
"Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
120 cl::desc(
"Minimum BranchProbability to consider a region cold."));
124 cl::desc(
"Max number of blocks to be partially inlined"));
130 cl::desc(
"Max number of partial inlining. The default is unlimited"));
138 cl::desc(
"Relative frequency of outline region to "
143 cl::desc(
"A debug option to add additional penalty to the computed one."));
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() =
default;
152 unsigned getNumInlinedBlocks()
const {
return Entries.size() + 1; }
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() =
default;
172 struct OutlineRegionInfo {
174 BasicBlock *ExitBlock, BasicBlock *ReturnBlock)
175 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
177 SmallVector<BasicBlock *, 8> Region;
186struct PartialInlinerImpl {
189 function_ref<AssumptionCache &(Function &)> GetAC,
190 function_ref<AssumptionCache *(Function &)> LookupAC,
191 function_ref<TargetTransformInfo &(Function &)> GTTI,
192 function_ref<
const TargetLibraryInfo &(Function &)> GTLI,
193 ProfileSummaryInfo &ProfSI,
194 function_ref<BlockFrequencyInfo &(Function &)> GBFI =
nullptr)
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
206 std::pair<bool, Function *> unswitchFunction(Function &
F);
212 struct FunctionCloner {
215 FunctionCloner(Function *
F, FunctionOutliningInfo *OI,
216 OptimizationRemarkEmitter &ORE,
217 function_ref<AssumptionCache *(Function &)> LookupAC,
218 function_ref<TargetTransformInfo &(Function &)> GetTTI);
219 FunctionCloner(Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
220 OptimizationRemarkEmitter &ORE,
221 function_ref<AssumptionCache *(Function &)> LookupAC,
222 function_ref<TargetTransformInfo &(Function &)> GetTTI);
229 void normalizeReturnBlock()
const;
232 bool doMultiRegionFunctionOutlining();
239 Function *doSingleRegionFunctionOutlining();
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250 bool IsFunctionInlined =
false;
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
258 OptimizationRemarkEmitter &ORE;
259 function_ref<AssumptionCache *(
Function &)> LookupAC;
260 function_ref<TargetTransformInfo &(
Function &)> GetTTI;
264 int NumPartialInlining = 0;
265 function_ref<AssumptionCache &(
Function &)> GetAssumptionCache;
266 function_ref<AssumptionCache *(
Function &)> LookupAssumptionCache;
267 function_ref<TargetTransformInfo &(
Function &)> GetTTI;
268 function_ref<BlockFrequencyInfo &(
Function &)> GetBFI;
269 function_ref<
const TargetLibraryInfo &(
Function &)> GetTLI;
270 ProfileSummaryInfo &PSI;
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner)
const;
281 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
282 BlockFrequency WeightedOutliningRcost,
283 OptimizationRemarkEmitter &ORE)
const;
288 bool tryPartialInline(FunctionCloner &Cloner);
293 computeCallsiteToProfCountMap(Function *DuplicateFunction,
294 DenseMap<User *, uint64_t> &SiteCountMap)
const;
296 bool isLimitReached()
const {
301 static CallBase *getSupportedCallBase(User *U) {
308 static CallBase *getOneCallSiteTo(Function &
F) {
310 return getSupportedCallBase(User);
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &
F)
const {
314 CallBase *CB = getOneCallSiteTo(
F);
317 return std::make_tuple(DLoc,
Block);
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner)
const;
333 TargetTransformInfo *
TTI);
335 std::unique_ptr<FunctionOutliningInfo>
336 computeOutliningInfo(Function &
F)
const;
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(Function &
F,
340 OptimizationRemarkEmitter &ORE)
const;
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
352 BranchProbabilityInfo BPI(
F, LI);
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
354 BlockFrequencyInfo *BFI;
356 ScopedBFI.reset(
new BlockFrequencyInfo(
F, BPI, LI));
357 BFI = ScopedBFI.get();
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
369 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
371 for (
auto *
Block : BlockList) {
376 return OptimizationRemarkMissed(
DEBUG_TYPE,
"MultiExitRegion",
378 <<
"Region dominated by "
379 <<
ore::NV(
"Block", BlockList.front()->getName())
380 <<
" has more than one region exit edge.";
398 TargetTransformInfo *FTTI = &GetTTI(
F);
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
403 LLVM_DEBUG(
dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost
409 BranchProbability MinBranchProbability(
412 bool ColdCandidateFound =
false;
414 std::vector<BasicBlock *> DFS;
415 SmallPtrSet<BasicBlock *, 8> VisitedSet;
416 DFS.push_back(CurrEntry);
417 VisitedSet.
insert(CurrEntry);
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
431 if (PSI.isColdBlock(ThisBB, BFI) ||
435 if (!VisitedSet.
insert(*SI).second)
439 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
440 if (SuccProb > MinBranchProbability)
443 LLVM_DEBUG(
dbgs() <<
"Found cold edge: " << ThisBB->getName() <<
"->"
445 <<
"\nBranch Probability = " << SuccProb <<
"\n";);
447 SmallVector<BasicBlock *, 8> DominateVector;
448 DT.getDescendants(*SI, DominateVector);
450 "SI should be reachable and have at least itself as descendant");
453 if (!DominateVector.
front()->hasNPredecessors(1)) {
455 <<
" doesn't have a single predecessor in the "
456 "dominator tree\n";);
462 if (!(ExitBlock = IsSingleExit(DominateVector))) {
464 <<
" doesn't have a unique successor\n";);
469 for (
auto *BB : DominateVector)
470 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
477 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"TooCostly",
480 <<
" inline cost-savings smaller than "
481 <<
ore::NV(
"Cost", MinOutlineRegionCost);
484 LLVM_DEBUG(
dbgs() <<
"ABORT: Outline region cost is smaller than "
485 << MinOutlineRegionCost <<
"\n";);
497 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
498 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
499 OutliningInfo->ORI.push_back(RegInfo);
501 << DominateVector.front()->getName() <<
"\n";);
502 ColdCandidateFound =
true;
503 NumColdRegionsFound++;
507 if (ColdCandidateFound)
508 return OutliningInfo;
510 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
513std::unique_ptr<FunctionOutliningInfo>
514PartialInlinerImpl::computeOutliningInfo(Function &
F)
const {
518 return std::unique_ptr<FunctionOutliningInfo>();
531 if (IsReturnBlock(Succ1))
532 return std::make_tuple(Succ1, Succ2);
533 if (IsReturnBlock(Succ2))
534 return std::make_tuple(Succ2, Succ1);
536 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
541 if (IsSuccessor(Succ1, Succ2))
542 return std::make_tuple(Succ1, Succ2);
543 if (IsSuccessor(Succ2, Succ1))
544 return std::make_tuple(Succ2, Succ1);
546 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
549 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
550 std::make_unique<FunctionOutliningInfo>();
553 bool CandidateFound =
false;
568 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
571 OutliningInfo->Entries.push_back(CurrEntry);
572 OutliningInfo->ReturnBlock = ReturnBlock;
573 OutliningInfo->NonReturnBlock = NonReturnBlock;
574 CandidateFound =
true;
579 std::tie(CommSucc,
OtherSucc) = GetCommonSucc(Succ1, Succ2);
584 OutliningInfo->Entries.push_back(CurrEntry);
589 return std::unique_ptr<FunctionOutliningInfo>();
593 assert(OutliningInfo->Entries[0] == &
F.front() &&
594 "Function Entry must be the first in Entries vector");
599 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
601 if (!Entries.count(Pred))
606 auto CheckAndNormalizeCandidate =
607 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
608 for (BasicBlock *
E : OutliningInfo->Entries) {
610 if (Entries.count(Succ))
612 if (Succ == OutliningInfo->ReturnBlock)
613 OutliningInfo->ReturnBlockPreds.push_back(
E);
614 else if (Succ != OutliningInfo->NonReturnBlock)
618 if (HasNonEntryPred(
E))
624 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
625 return std::unique_ptr<FunctionOutliningInfo>();
630 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
634 if (HasNonEntryPred(Cand))
641 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
642 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
649 OutliningInfo->Entries.push_back(Cand);
650 OutliningInfo->NonReturnBlock = NonReturnBlock;
651 OutliningInfo->ReturnBlockPreds.push_back(Cand);
652 Entries.insert(Cand);
655 return OutliningInfo;
660 if (
F.hasProfileData())
663 for (
auto *
E : OI.Entries) {
671BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
672 FunctionCloner &Cloner)
const {
673 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
675 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
676 auto OutliningCallFreq =
677 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
681 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
682 OutliningCallFreq = EntryFreq;
685 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
688 return OutlineRegionRelFreq;
702 if (OutlineRegionRelFreq < BranchProbability(45, 100))
703 return OutlineRegionRelFreq;
705 OutlineRegionRelFreq = std::max(
708 return OutlineRegionRelFreq;
711bool PartialInlinerImpl::shouldPartialInline(
712 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
713 OptimizationRemarkEmitter &ORE)
const {
717 assert(Callee == Cloner.ClonedFunc);
723 auto &CalleeTTI = GetTTI(*Callee);
724 bool RemarksEnabled =
725 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
729 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE :
nullptr);
733 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"AlwaysInline", &CB)
734 <<
NV(
"Callee", Cloner.OrigFunc)
735 <<
" should always be fully inlined, not partially";
742 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline", &CB)
743 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
744 <<
NV(
"Caller", Caller)
745 <<
" because it should never be inlined (cost=never)";
752 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"TooCostly", &CB)
753 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
754 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost="
755 <<
NV(
"Cost", IC.
getCost()) <<
", threshold="
760 const DataLayout &
DL =
Caller->getDataLayout();
764 BlockFrequency NormWeightedSavings(NonWeightedSavings);
767 if (NormWeightedSavings < WeightedOutliningRcost) {
769 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"OutliningCallcostTooHigh",
771 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
772 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead="
773 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
775 <<
NV(
"Savings", (
unsigned)NormWeightedSavings.getFrequency())
777 <<
" of making the outlined call is too high";
784 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"CanBePartiallyInlined", &CB)
785 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into "
786 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
797PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
798 TargetTransformInfo *
TTI) {
802 for (Instruction &
I : *BB) {
804 switch (
I.getOpcode()) {
805 case Instruction::BitCast:
806 case Instruction::PtrToInt:
807 case Instruction::IntToPtr:
808 case Instruction::Alloca:
809 case Instruction::PHI:
811 case Instruction::GetElementPtr:
819 if (
I.isLifetimeStartOrEnd())
830 FMF = FPMO->getFastMathFlags();
832 IntrinsicCostAttributes ICA(IID,
II->getType(), Tys, FMF);
857std::tuple<InstructionCost, InstructionCost>
858PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner)
const {
860 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
861 Function *OutlinedFunc = FuncBBPair.first;
862 BasicBlock* OutliningCallBB = FuncBBPair.second;
865 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
866 OutliningFuncCallCost +=
867 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
870 for (BasicBlock &BB : *OutlinedFunc)
871 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
873 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
874 "Outlined function cost should be no less than the outlined region");
879 OutlinedFunctionCost -=
883 OutliningFuncCallCost +
884 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
887 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
893void PartialInlinerImpl::computeCallsiteToProfCountMap(
894 Function *DuplicateFunction,
895 DenseMap<User *, uint64_t> &CallSiteToProfCountMap)
const {
899 std::unique_ptr<BlockFrequencyInfo> TempBFI;
900 BlockFrequencyInfo *CurrentCallerBFI =
nullptr;
905 DominatorTree DT(*Caller);
907 BranchProbabilityInfo BPI(*Caller, LI);
908 TempBFI.reset(
new BlockFrequencyInfo(*Caller, BPI, LI));
909 CurrentCallerBFI = TempBFI.get();
912 CurrentCallerBFI = &(GetBFI(*Caller));
916 for (User *User :
Users) {
917 CallBase *CB = getSupportedCallBase(User);
919 if (CurrentCaller != Caller) {
921 ComputeCurrBFI(Caller);
923 assert(CurrentCallerBFI &&
"CallerBFI is not set");
930 CallSiteToProfCountMap[
User] = 0;
934PartialInlinerImpl::FunctionCloner::FunctionCloner(
935 Function *
F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
936 function_ref<AssumptionCache *(Function &)> LookupAC,
937 function_ref<TargetTransformInfo &(Function &)> GetTTI)
938 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
939 ClonedOI = std::make_unique<FunctionOutliningInfo>();
952 ClonedOI->ReturnBlockPreds.push_back(NewE);
956 F->replaceAllUsesWith(ClonedFunc);
959PartialInlinerImpl::FunctionCloner::FunctionCloner(
960 Function *
F, FunctionOutliningMultiRegionInfo *OI,
964 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
965 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
973 for (
const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &
RegionInfo :
984 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
985 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
986 ClonedOMRI->ORI.push_back(MappedRegionInfo);
990 F->replaceAllUsesWith(ClonedFunc);
993void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock()
const {
996 PHINode *FirstPhi =
nullptr;
997 while (
I != BB->end()) {
1018 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1020 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1021 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1026 auto IsTrivialPhi = [](PHINode *PN) ->
Value * {
1028 return PN->getIncomingValue(0);
1032 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1033 ClonedOI->ReturnBlock->getFirstNonPHIIt());
1036 SmallVector<Instruction *, 4> DeadPhis;
1037 while (
I != PreReturn->
end()) {
1046 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1049 for (BasicBlock *
E : ClonedOI->ReturnBlockPreds) {
1058 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1064 for (
auto *DP : DeadPhis)
1065 DP->eraseFromParent();
1067 for (
auto *
E : ClonedOI->ReturnBlockPreds)
1068 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1071bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1073 auto ComputeRegionCost =
1076 for (BasicBlock* BB : Region)
1077 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1081 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1083 if (ClonedOMRI->ORI.empty())
1092 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1093 ClonedFuncBFI.reset(
new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1096 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1098 SetVector<Value *> Inputs, Outputs, Sinks;
1099 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1102 ComputeRegionCost(RegionInfo.Region);
1104 CodeExtractor
CE(RegionInfo.Region, &DT,
false,
1105 ClonedFuncBFI.get(), &BPI,
1106 LookupAC(*RegionInfo.EntryBlock->
getParent()),
1109 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1112 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1113 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1114 for (
Value *value : Inputs)
1115 dbgs() <<
"value used in func: " << *value <<
"\n";
1116 for (
Value *output : Outputs)
1117 dbgs() <<
"instr used in func: " << *output <<
"\n";
1124 if (Function *OutlinedFunc =
CE.extractCodeRegion(CEAC)) {
1125 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1128 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1129 NumColdRegionsOutlined++;
1130 OutlinedRegionCost += CurrentOutlinedRegionCost;
1138 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExtractFailed",
1139 &RegionInfo.Region.
front()->front())
1140 <<
"Failed to extract region at block "
1145 return !OutlinedFunctions.empty();
1149PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1152 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1153 return BB == ClonedOI->ReturnBlock ||
1157 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1164 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1165 ClonedFuncBFI.reset(
new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1168 std::vector<BasicBlock *> ToExtract;
1169 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1170 ToExtract.push_back(ClonedOI->NonReturnBlock);
1171 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1172 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1173 for (BasicBlock *BB :
depth_first(&ClonedFunc->getEntryBlock()))
1174 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1175 ToExtract.push_back(BB);
1180 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1184 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1186 CodeExtractor(ToExtract, &DT,
false,
1187 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1189 .extractCodeRegion(CEAC);
1193 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->
getParent();
1195 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1198 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExtractFailed",
1199 &ToExtract.front()->front())
1200 <<
"Failed to extract region at block "
1201 <<
ore::NV(
"Block", ToExtract.front());
1204 return OutlinedFunc;
1207PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1211 ClonedFunc->eraseFromParent();
1212 if (!IsFunctionInlined) {
1215 for (
auto FuncBBPair : OutlinedFunctions) {
1217 Func->eraseFromParent();
1222std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &
F) {
1223 if (
F.hasAddressTaken())
1224 return {
false,
nullptr};
1227 if (
F.hasFnAttribute(Attribute::AlwaysInline))
1228 return {
false,
nullptr};
1230 if (
F.hasFnAttribute(Attribute::NoInline))
1231 return {
false,
nullptr};
1233 if (PSI.isFunctionEntryCold(&
F))
1234 return {
false,
nullptr};
1236 if (
F.users().empty())
1237 return {
false,
nullptr};
1239 OptimizationRemarkEmitter ORE(&
F);
1243 if (PSI.hasProfileSummary() &&
F.hasProfileData() &&
1245 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1246 computeOutliningColdRegionsInfo(
F, ORE);
1248 FunctionCloner Cloner(&
F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1251 dbgs() <<
"HotCountThreshold = " << PSI.getHotCountThreshold() <<
"\n";
1252 dbgs() <<
"ColdCountThreshold = " << PSI.getColdCountThreshold()
1256 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1260 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1261 Cloner.ClonedFunc->print(
dbgs());
1262 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1265 if (tryPartialInline(Cloner))
1266 return {
true,
nullptr};
1274 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(
F);
1276 return {
false,
nullptr};
1278 FunctionCloner Cloner(&
F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1279 Cloner.normalizeReturnBlock();
1281 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1283 if (!OutlinedFunction)
1284 return {
false,
nullptr};
1286 if (tryPartialInline(Cloner))
1287 return {
true, OutlinedFunction};
1289 return {
false,
nullptr};
1292bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1293 if (Cloner.OutlinedFunctions.empty())
1296 auto OutliningCosts = computeOutliningCosts(Cloner);
1302 "Expected valid costs");
1306 BranchProbability RelativeToEntryFreq;
1307 if (Cloner.ClonedOI)
1308 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1315 RelativeToEntryFreq = BranchProbability(0, 1);
1317 BlockFrequency WeightedRcost =
1318 BlockFrequency(NonWeightedRcost.
getValue()) * RelativeToEntryFreq;
1325 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1328 std::tie(DLoc,
Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1329 OrigFuncORE.emit([&]() {
1330 return OptimizationRemarkAnalysis(
DEBUG_TYPE,
"OutlineRegionTooSmall",
1332 <<
ore::NV(
"Function", Cloner.OrigFunc)
1333 <<
" not partially inlined into callers (Original Size = "
1334 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1335 <<
", Size of call sequence to outlined function = "
1336 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1341 assert(Cloner.OrigFunc->users().empty() &&
1342 "F's users should all be replaced!");
1344 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1345 Cloner.ClonedFunc->user_end());
1347 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1348 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1349 if (CalleeEntryCount)
1350 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1352 uint64_t CalleeEntryCountV =
1353 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1355 bool AnyInline =
false;
1356 for (User *User :
Users) {
1357 CallBase *CB = getSupportedCallBase(User);
1359 if (isLimitReached())
1362 OptimizationRemarkEmitter CallerORE(CB->
getCaller());
1363 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1368 OptimizationRemark
OR(
DEBUG_TYPE,
"PartiallyInlined", CB);
1369 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into "
1372 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1376 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1384 if (CalleeEntryCountV) {
1385 if (
auto It = CallSiteToProfCountMap.
find(User);
1386 It != CallSiteToProfCountMap.
end()) {
1387 uint64_t CallSiteCount = It->second;
1388 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1393 NumPartialInlining++;
1395 if (Cloner.ClonedOI)
1396 NumPartialInlined++;
1398 NumColdOutlinePartialInlined++;
1402 Cloner.IsFunctionInlined =
true;
1403 if (CalleeEntryCount)
1404 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1405 CalleeEntryCountV, CalleeEntryCount->getType()));
1406 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1407 OrigFuncORE.emit([&]() {
1408 return OptimizationRemark(
DEBUG_TYPE,
"PartiallyInlined", Cloner.OrigFunc)
1409 <<
"Partially inlined into at least one caller";
1416bool PartialInlinerImpl::run(
Module &M) {
1420 std::vector<Function *> Worklist;
1421 Worklist.reserve(
M.size());
1422 for (Function &
F : M)
1423 if (!
F.use_empty() && !
F.isDeclaration())
1424 Worklist.push_back(&
F);
1427 while (!Worklist.empty()) {
1428 Function *CurrFunc = Worklist.back();
1429 Worklist.pop_back();
1434 std::pair<bool, Function *>
Result = unswitchFunction(*CurrFunc);
1436 Worklist.push_back(
Result.second);
1469 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1470 GetTLI, PSI, GetBFI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
cl::opt< unsigned > MinBlockCounterExecution
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
Machine Check Debug Module
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Conditional Branch instruction.
iterator find(const_arg_type_t< KeyT > Val)
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
void setCallingConv(CallingConv::ID CC)
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
auto map(const Function &F) const -> InstructionCost
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BR
Control flow instructions. These all have token chains.
@ BasicBlock
Various leaf nodes.
LLVM_ABI int getInstrCost()
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.