LLVM 17.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/CFG.h"
31#include "llvm/IR/DebugLoc.h"
33#include "llvm/IR/Dominators.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/InstrTypes.h"
36#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Intrinsics.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/Operator.h"
43#include "llvm/IR/User.h"
45#include "llvm/Pass.h"
51#include "llvm/Transforms/IPO.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <memory>
59#include <tuple>
60#include <vector>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "partial-inlining"
65
66STATISTIC(NumPartialInlined,
67 "Number of callsites functions partially inlined into.");
68STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
69 "cold outlined regions were partially "
70 "inlined into its caller(s).");
71STATISTIC(NumColdRegionsFound,
72 "Number of cold single entry/exit regions found.");
73STATISTIC(NumColdRegionsOutlined,
74 "Number of cold single entry/exit regions outlined.");
75
76// Command line option to disable partial-inlining. The default is false:
77static cl::opt<bool>
78 DisablePartialInlining("disable-partial-inlining", cl::init(false),
79 cl::Hidden, cl::desc("Disable partial inlining"));
80// Command line option to disable multi-region partial-inlining. The default is
81// false:
83 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
84 cl::desc("Disable multi-region partial inlining"));
85
86// Command line option to force outlining in regions with live exit variables.
87// The default is false:
88static cl::opt<bool>
89 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
90 cl::desc("Force outline regions with live exits"));
91
92// Command line option to enable marking outline functions with Cold Calling
93// Convention. The default is false:
94static cl::opt<bool>
95 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
96 cl::desc("Mark outline function calls with ColdCC"));
97
98// This is an option used by testing:
99static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
100
102 cl::desc("Skip Cost Analysis"));
103// Used to determine if a cold region is worth outlining based on
104// its inlining cost compared to the original function. Default is set at 10%.
105// ie. if the cold region reduces the inlining cost of the original function by
106// at least 10%.
108 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
109 cl::desc("Minimum ratio comparing relative sizes of each "
110 "outline candidate and original function"));
111// Used to tune the minimum number of execution counts needed in the predecessor
112// block to the cold edge. ie. confidence interval.
114 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
115 cl::desc("Minimum block executions to consider "
116 "its BranchProbabilityInfo valid"));
117// Used to determine when an edge is considered cold. Default is set to 10%. ie.
118// if the branch probability is 10% or less, then it is deemed as 'cold'.
120 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
121 cl::desc("Minimum BranchProbability to consider a region cold."));
122
124 "max-num-inline-blocks", cl::init(5), cl::Hidden,
125 cl::desc("Max number of blocks to be partially inlined"));
126
127// Command line option to set the maximum number of partial inlining allowed
128// for the module. The default value of -1 means no limit.
130 "max-partial-inlining", cl::init(-1), cl::Hidden,
131 cl::desc("Max number of partial inlining. The default is unlimited"));
132
133// Used only when PGO or user annotated branch data is absent. It is
134// the least value that is used to weigh the outline region. If BFI
135// produces larger value, the BFI value will be used.
136static cl::opt<int>
137 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
139 cl::desc("Relative frequency of outline region to "
140 "the entry block"));
141
143 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
144 cl::desc("A debug option to add additional penalty to the computed one."));
145
146namespace {
147
148struct FunctionOutliningInfo {
149 FunctionOutliningInfo() = default;
150
151 // Returns the number of blocks to be inlined including all blocks
152 // in Entries and one return block.
153 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
154
155 // A set of blocks including the function entry that guard
156 // the region to be outlined.
158
159 // The return block that is not included in the outlined region.
160 BasicBlock *ReturnBlock = nullptr;
161
162 // The dominating block of the region to be outlined.
163 BasicBlock *NonReturnBlock = nullptr;
164
165 // The set of blocks in Entries that that are predecessors to ReturnBlock
166 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
167};
168
169struct FunctionOutliningMultiRegionInfo {
170 FunctionOutliningMultiRegionInfo() = default;
171
172 // Container for outline regions
173 struct OutlineRegionInfo {
174 OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
175 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
176 BasicBlock *ReturnBlock)
177 : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
178 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
180 BasicBlock *EntryBlock;
181 BasicBlock *ExitBlock;
182 BasicBlock *ReturnBlock;
183 };
184
186};
187
188struct PartialInlinerImpl {
189
190 PartialInlinerImpl(
194 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
195 ProfileSummaryInfo &ProfSI,
196 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
197 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
198 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
199
200 bool run(Module &M);
201 // Main part of the transformation that calls helper functions to find
202 // outlining candidates, clone & outline the function, and attempt to
203 // partially inline the resulting function. Returns true if
204 // inlining was successful, false otherwise. Also returns the outline
205 // function (only if we partially inlined early returns) as there is a
206 // possibility to further "peel" early return statements that were left in the
207 // outline function due to code size.
208 std::pair<bool, Function *> unswitchFunction(Function &F);
209
210 // This class speculatively clones the function to be partial inlined.
211 // At the end of partial inlining, the remaining callsites to the cloned
212 // function that are not partially inlined will be fixed up to reference
213 // the original function, and the cloned function will be erased.
214 struct FunctionCloner {
215 // Two constructors, one for single region outlining, the other for
216 // multi-region outlining.
217 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
221 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
225
226 ~FunctionCloner();
227
228 // Prepare for function outlining: making sure there is only
229 // one incoming edge from the extracted/outlined region to
230 // the return block.
231 void normalizeReturnBlock() const;
232
233 // Do function outlining for cold regions.
234 bool doMultiRegionFunctionOutlining();
235 // Do function outlining for region after early return block(s).
236 // NOTE: For vararg functions that do the vararg handling in the outlined
237 // function, we temporarily generate IR that does not properly
238 // forward varargs to the outlined function. Calling InlineFunction
239 // will update calls to the outlined functions to properly forward
240 // the varargs.
241 Function *doSingleRegionFunctionOutlining();
242
243 Function *OrigFunc = nullptr;
244 Function *ClonedFunc = nullptr;
245
246 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
247 // Keep track of Outlined Functions and the basic block they're called from.
248 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
249
250 // ClonedFunc is inlined in one of its callers after function
251 // outlining.
252 bool IsFunctionInlined = false;
253 // The cost of the region to be outlined.
254 InstructionCost OutlinedRegionCost = 0;
255 // ClonedOI is specific to outlining non-early return blocks.
256 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
257 // ClonedOMRI is specific to outlining cold regions.
258 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
259 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
263 };
264
265private:
266 int NumPartialInlining = 0;
267 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
268 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
271 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
273
274 // Return the frequency of the OutlininingBB relative to F's entry point.
275 // The result is no larger than 1 and is represented using BP.
276 // (Note that the outlined region's 'head' block can only have incoming
277 // edges from the guarding entry blocks).
279 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
280
281 // Return true if the callee of CB should be partially inlined with
282 // profit.
283 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
284 BlockFrequency WeightedOutliningRcost,
285 OptimizationRemarkEmitter &ORE) const;
286
287 // Try to inline DuplicateFunction (cloned from F with call to
288 // the OutlinedFunction into its callers. Return true
289 // if there is any successful inlining.
290 bool tryPartialInline(FunctionCloner &Cloner);
291
292 // Compute the mapping from use site of DuplicationFunction to the enclosing
293 // BB's profile count.
294 void
295 computeCallsiteToProfCountMap(Function *DuplicateFunction,
296 DenseMap<User *, uint64_t> &SiteCountMap) const;
297
298 bool isLimitReached() const {
299 return (MaxNumPartialInlining != -1 &&
300 NumPartialInlining >= MaxNumPartialInlining);
301 }
302
303 static CallBase *getSupportedCallBase(User *U) {
304 if (isa<CallInst>(U) || isa<InvokeInst>(U))
305 return cast<CallBase>(U);
306 llvm_unreachable("All uses must be calls");
307 return nullptr;
308 }
309
310 static CallBase *getOneCallSiteTo(Function &F) {
311 User *User = *F.user_begin();
312 return getSupportedCallBase(User);
313 }
314
315 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
316 CallBase *CB = getOneCallSiteTo(F);
317 DebugLoc DLoc = CB->getDebugLoc();
318 BasicBlock *Block = CB->getParent();
319 return std::make_tuple(DLoc, Block);
320 }
321
322 // Returns the costs associated with function outlining:
323 // - The first value is the non-weighted runtime cost for making the call
324 // to the outlined function, including the addtional setup cost in the
325 // outlined function itself;
326 // - The second value is the estimated size of the new call sequence in
327 // basic block Cloner.OutliningCallBB;
328 std::tuple<InstructionCost, InstructionCost>
329 computeOutliningCosts(FunctionCloner &Cloner) const;
330
331 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
332 // approximate both the size and runtime cost (Note that in the current
333 // inline cost analysis, there is no clear distinction there either).
334 static InstructionCost computeBBInlineCost(BasicBlock *BB,
336
337 std::unique_ptr<FunctionOutliningInfo>
338 computeOutliningInfo(Function &F) const;
339
340 std::unique_ptr<FunctionOutliningMultiRegionInfo>
341 computeOutliningColdRegionsInfo(Function &F,
342 OptimizationRemarkEmitter &ORE) const;
343};
344
345} // end anonymous namespace
346
347std::unique_ptr<FunctionOutliningMultiRegionInfo>
348PartialInlinerImpl::computeOutliningColdRegionsInfo(
349 Function &F, OptimizationRemarkEmitter &ORE) const {
350 BasicBlock *EntryBlock = &F.front();
351
352 DominatorTree DT(F);
353 LoopInfo LI(DT);
354 BranchProbabilityInfo BPI(F, LI);
355 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
357 if (!GetBFI) {
358 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
359 BFI = ScopedBFI.get();
360 } else
361 BFI = &(GetBFI(F));
362
363 // Return if we don't have profiling information.
364 if (!PSI.hasInstrumentationProfile())
365 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
366
367 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
368 std::make_unique<FunctionOutliningMultiRegionInfo>();
369
370 auto IsSingleExit =
371 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
372 BasicBlock *ExitBlock = nullptr;
373 for (auto *Block : BlockList) {
374 for (BasicBlock *Succ : successors(Block)) {
375 if (!is_contained(BlockList, Succ)) {
376 if (ExitBlock) {
377 ORE.emit([&]() {
378 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
379 &Succ->front())
380 << "Region dominated by "
381 << ore::NV("Block", BlockList.front()->getName())
382 << " has more than one region exit edge.";
383 });
384 return nullptr;
385 }
386
387 ExitBlock = Block;
388 }
389 }
390 }
391 return ExitBlock;
392 };
393
394 auto BBProfileCount = [BFI](BasicBlock *BB) {
395 return BFI->getBlockProfileCount(BB).value_or(0);
396 };
397
398 // Use the same computeBBInlineCost function to compute the cost savings of
399 // the outlining the candidate region.
400 TargetTransformInfo *FTTI = &GetTTI(F);
401 InstructionCost OverallFunctionCost = 0;
402 for (auto &BB : F)
403 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
404
405 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
406 << "\n";);
407
408 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
409 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
410
411 BranchProbability MinBranchProbability(
412 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
414 bool ColdCandidateFound = false;
415 BasicBlock *CurrEntry = EntryBlock;
416 std::vector<BasicBlock *> DFS;
418 DFS.push_back(CurrEntry);
419 VisitedMap[CurrEntry] = true;
420
421 // Use Depth First Search on the basic blocks to find CFG edges that are
422 // considered cold.
423 // Cold regions considered must also have its inline cost compared to the
424 // overall inline cost of the original function. The region is outlined only
425 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
426 // more.
427 while (!DFS.empty()) {
428 auto *ThisBB = DFS.back();
429 DFS.pop_back();
430 // Only consider regions with predecessor blocks that are considered
431 // not-cold (default: part of the top 99.99% of all block counters)
432 // AND greater than our minimum block execution count (default: 100).
433 if (PSI.isColdBlock(ThisBB, BFI) ||
434 BBProfileCount(ThisBB) < MinBlockCounterExecution)
435 continue;
436 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
437 if (VisitedMap[*SI])
438 continue;
439 VisitedMap[*SI] = true;
440 DFS.push_back(*SI);
441 // If branch isn't cold, we skip to the next one.
442 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
443 if (SuccProb > MinBranchProbability)
444 continue;
445
446 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
447 << SI->getName()
448 << "\nBranch Probability = " << SuccProb << "\n";);
449
450 SmallVector<BasicBlock *, 8> DominateVector;
451 DT.getDescendants(*SI, DominateVector);
452 assert(!DominateVector.empty() &&
453 "SI should be reachable and have at least itself as descendant");
454
455 // We can only outline single entry regions (for now).
456 if (!DominateVector.front()->hasNPredecessors(1)) {
457 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
458 << " doesn't have a single predecessor in the "
459 "dominator tree\n";);
460 continue;
461 }
462
463 BasicBlock *ExitBlock = nullptr;
464 // We can only outline single exit regions (for now).
465 if (!(ExitBlock = IsSingleExit(DominateVector))) {
466 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
467 << " doesn't have a unique successor\n";);
468 continue;
469 }
470
471 InstructionCost OutlineRegionCost = 0;
472 for (auto *BB : DominateVector)
473 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
474
475 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
476 << "\n";);
477
478 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
479 ORE.emit([&]() {
480 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
481 &SI->front())
482 << ore::NV("Callee", &F)
483 << " inline cost-savings smaller than "
484 << ore::NV("Cost", MinOutlineRegionCost);
485 });
486
487 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
488 << MinOutlineRegionCost << "\n";);
489 continue;
490 }
491
492 // For now, ignore blocks that belong to a SISE region that is a
493 // candidate for outlining. In the future, we may want to look
494 // at inner regions because the outer region may have live-exit
495 // variables.
496 for (auto *BB : DominateVector)
497 VisitedMap[BB] = true;
498
499 // ReturnBlock here means the block after the outline call
500 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
501 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
502 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
503 OutliningInfo->ORI.push_back(RegInfo);
504 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
505 << DominateVector.front()->getName() << "\n";);
506 ColdCandidateFound = true;
507 NumColdRegionsFound++;
508 }
509 }
510
511 if (ColdCandidateFound)
512 return OutliningInfo;
513
514 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
515}
516
517std::unique_ptr<FunctionOutliningInfo>
518PartialInlinerImpl::computeOutliningInfo(Function &F) const {
519 BasicBlock *EntryBlock = &F.front();
520 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
521 if (!BR || BR->isUnconditional())
522 return std::unique_ptr<FunctionOutliningInfo>();
523
524 // Returns true if Succ is BB's successor
525 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
526 return is_contained(successors(BB), Succ);
527 };
528
529 auto IsReturnBlock = [](BasicBlock *BB) {
530 Instruction *TI = BB->getTerminator();
531 return isa<ReturnInst>(TI);
532 };
533
534 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
535 if (IsReturnBlock(Succ1))
536 return std::make_tuple(Succ1, Succ2);
537 if (IsReturnBlock(Succ2))
538 return std::make_tuple(Succ2, Succ1);
539
540 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
541 };
542
543 // Detect a triangular shape:
544 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
545 if (IsSuccessor(Succ1, Succ2))
546 return std::make_tuple(Succ1, Succ2);
547 if (IsSuccessor(Succ2, Succ1))
548 return std::make_tuple(Succ2, Succ1);
549
550 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
551 };
552
553 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
554 std::make_unique<FunctionOutliningInfo>();
555
556 BasicBlock *CurrEntry = EntryBlock;
557 bool CandidateFound = false;
558 do {
559 // The number of blocks to be inlined has already reached
560 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
561 // disables partial inlining for the function.
562 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
563 break;
564
565 if (succ_size(CurrEntry) != 2)
566 break;
567
568 BasicBlock *Succ1 = *succ_begin(CurrEntry);
569 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
570
571 BasicBlock *ReturnBlock, *NonReturnBlock;
572 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
573
574 if (ReturnBlock) {
575 OutliningInfo->Entries.push_back(CurrEntry);
576 OutliningInfo->ReturnBlock = ReturnBlock;
577 OutliningInfo->NonReturnBlock = NonReturnBlock;
578 CandidateFound = true;
579 break;
580 }
581
582 BasicBlock *CommSucc, *OtherSucc;
583 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
584
585 if (!CommSucc)
586 break;
587
588 OutliningInfo->Entries.push_back(CurrEntry);
589 CurrEntry = OtherSucc;
590 } while (true);
591
592 if (!CandidateFound)
593 return std::unique_ptr<FunctionOutliningInfo>();
594
595 // There should not be any successors (not in the entry set) other than
596 // {ReturnBlock, NonReturnBlock}
597 assert(OutliningInfo->Entries[0] == &F.front() &&
598 "Function Entry must be the first in Entries vector");
600 for (BasicBlock *E : OutliningInfo->Entries)
601 Entries.insert(E);
602
603 // Returns true of BB has Predecessor which is not
604 // in Entries set.
605 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
606 for (auto *Pred : predecessors(BB)) {
607 if (!Entries.count(Pred))
608 return true;
609 }
610 return false;
611 };
612 auto CheckAndNormalizeCandidate =
613 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
614 for (BasicBlock *E : OutliningInfo->Entries) {
615 for (auto *Succ : successors(E)) {
616 if (Entries.count(Succ))
617 continue;
618 if (Succ == OutliningInfo->ReturnBlock)
619 OutliningInfo->ReturnBlockPreds.push_back(E);
620 else if (Succ != OutliningInfo->NonReturnBlock)
621 return false;
622 }
623 // There should not be any outside incoming edges either:
624 if (HasNonEntryPred(E))
625 return false;
626 }
627 return true;
628 };
629
630 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
631 return std::unique_ptr<FunctionOutliningInfo>();
632
633 // Now further growing the candidate's inlining region by
634 // peeling off dominating blocks from the outlining region:
635 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
636 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
637 if (succ_size(Cand) != 2)
638 break;
639
640 if (HasNonEntryPred(Cand))
641 break;
642
643 BasicBlock *Succ1 = *succ_begin(Cand);
644 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
645
646 BasicBlock *ReturnBlock, *NonReturnBlock;
647 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
648 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
649 break;
650
651 if (NonReturnBlock->getSinglePredecessor() != Cand)
652 break;
653
654 // Now grow and update OutlininigInfo:
655 OutliningInfo->Entries.push_back(Cand);
656 OutliningInfo->NonReturnBlock = NonReturnBlock;
657 OutliningInfo->ReturnBlockPreds.push_back(Cand);
658 Entries.insert(Cand);
659 }
660
661 return OutliningInfo;
662}
663
664// Check if there is PGO data or user annotated branch data:
665static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
666 if (F.hasProfileData())
667 return true;
668 // Now check if any of the entry block has MD_prof data:
669 for (auto *E : OI.Entries) {
670 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
671 if (!BR || BR->isUnconditional())
672 continue;
673 if (hasBranchWeightMD(*BR))
674 return true;
675 }
676 return false;
677}
678
679BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
680 FunctionCloner &Cloner) const {
681 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
682 auto EntryFreq =
683 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
684 auto OutliningCallFreq =
685 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
686 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
687 // we outlined any regions, so we may encounter situations where the
688 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
689 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
690 OutliningCallFreq = EntryFreq;
691
692 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
693 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
694
695 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
696 return OutlineRegionRelFreq;
697
698 // When profile data is not available, we need to be conservative in
699 // estimating the overall savings. Static branch prediction can usually
700 // guess the branch direction right (taken/non-taken), but the guessed
701 // branch probability is usually not biased enough. In case when the
702 // outlined region is predicted to be likely, its probability needs
703 // to be made higher (more biased) to not under-estimate the cost of
704 // function outlining. On the other hand, if the outlined region
705 // is predicted to be less likely, the predicted probablity is usually
706 // higher than the actual. For instance, the actual probability of the
707 // less likely target is only 5%, but the guessed probablity can be
708 // 40%. In the latter case, there is no need for further adjustment.
709 // FIXME: add an option for this.
710 if (OutlineRegionRelFreq < BranchProbability(45, 100))
711 return OutlineRegionRelFreq;
712
713 OutlineRegionRelFreq = std::max(
714 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
715
716 return OutlineRegionRelFreq;
717}
718
719bool PartialInlinerImpl::shouldPartialInline(
720 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
721 OptimizationRemarkEmitter &ORE) const {
722 using namespace ore;
723
725 assert(Callee == Cloner.ClonedFunc);
726
728 return isInlineViable(*Callee).isSuccess();
729
730 Function *Caller = CB.getCaller();
731 auto &CalleeTTI = GetTTI(*Callee);
732 bool RemarksEnabled =
733 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
734 DEBUG_TYPE);
735 InlineCost IC =
736 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
737 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
738
739 if (IC.isAlways()) {
740 ORE.emit([&]() {
741 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
742 << NV("Callee", Cloner.OrigFunc)
743 << " should always be fully inlined, not partially";
744 });
745 return false;
746 }
747
748 if (IC.isNever()) {
749 ORE.emit([&]() {
750 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
751 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
752 << NV("Caller", Caller)
753 << " because it should never be inlined (cost=never)";
754 });
755 return false;
756 }
757
758 if (!IC) {
759 ORE.emit([&]() {
760 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
761 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
762 << NV("Caller", Caller) << " because too costly to inline (cost="
763 << NV("Cost", IC.getCost()) << ", threshold="
764 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
765 });
766 return false;
767 }
768 const DataLayout &DL = Caller->getParent()->getDataLayout();
769
770 // The savings of eliminating the call:
771 int NonWeightedSavings = getCallsiteCost(CB, DL);
772 BlockFrequency NormWeightedSavings(NonWeightedSavings);
773
774 // Weighted saving is smaller than weighted cost, return false
775 if (NormWeightedSavings < WeightedOutliningRcost) {
776 ORE.emit([&]() {
777 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
778 &CB)
779 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
780 << NV("Caller", Caller) << " runtime overhead (overhead="
781 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
782 << ", savings="
783 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
784 << ")"
785 << " of making the outlined call is too high";
786 });
787
788 return false;
789 }
790
791 ORE.emit([&]() {
792 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
793 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
794 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
795 << " (threshold="
796 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
797 });
798 return true;
799}
800
801// TODO: Ideally we should share Inliner's InlineCost Analysis code.
802// For now use a simplified version. The returned 'InlineCost' will be used
803// to esimate the size cost as well as runtime cost of the BB.
805PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
808 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
810 for (Instruction &I : BB->instructionsWithoutDebug()) {
811 // Skip free instructions.
812 switch (I.getOpcode()) {
813 case Instruction::BitCast:
814 case Instruction::PtrToInt:
815 case Instruction::IntToPtr:
816 case Instruction::Alloca:
817 case Instruction::PHI:
818 continue;
819 case Instruction::GetElementPtr:
820 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
821 continue;
822 break;
823 default:
824 break;
825 }
826
827 if (I.isLifetimeStartOrEnd())
828 continue;
829
830 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
831 Intrinsic::ID IID = II->getIntrinsicID();
833 FastMathFlags FMF;
834 for (Value *Val : II->args())
835 Tys.push_back(Val->getType());
836
837 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
838 FMF = FPMO->getFastMathFlags();
839
840 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
842 continue;
843 }
844
845 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
847 continue;
848 }
849
850 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
852 continue;
853 }
854
855 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
856 InlineCost += (SI->getNumCases() + 1) * InstrCost;
857 continue;
858 }
860 }
861
862 return InlineCost;
863}
864
865std::tuple<InstructionCost, InstructionCost>
866PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
867 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
868 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
869 Function *OutlinedFunc = FuncBBPair.first;
870 BasicBlock* OutliningCallBB = FuncBBPair.second;
871 // Now compute the cost of the call sequence to the outlined function
872 // 'OutlinedFunction' in BB 'OutliningCallBB':
873 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
874 OutliningFuncCallCost +=
875 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
876
877 // Now compute the cost of the extracted/outlined function itself:
878 for (BasicBlock &BB : *OutlinedFunc)
879 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
880 }
881 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
882 "Outlined function cost should be no less than the outlined region");
883
884 // The code extractor introduces a new root and exit stub blocks with
885 // additional unconditional branches. Those branches will be eliminated
886 // later with bb layout. The cost should be adjusted accordingly:
887 OutlinedFunctionCost -=
888 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
889
890 InstructionCost OutliningRuntimeOverhead =
891 OutliningFuncCallCost +
892 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
893 ExtraOutliningPenalty.getValue();
894
895 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
896}
897
898// Create the callsite to profile count map which is
899// used to update the original function's entry count,
900// after the function is partially inlined into the callsite.
901void PartialInlinerImpl::computeCallsiteToProfCountMap(
902 Function *DuplicateFunction,
903 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
904 std::vector<User *> Users(DuplicateFunction->user_begin(),
905 DuplicateFunction->user_end());
906 Function *CurrentCaller = nullptr;
907 std::unique_ptr<BlockFrequencyInfo> TempBFI;
908 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
909
910 auto ComputeCurrBFI = [&,this](Function *Caller) {
911 // For the old pass manager:
912 if (!GetBFI) {
913 DominatorTree DT(*Caller);
914 LoopInfo LI(DT);
915 BranchProbabilityInfo BPI(*Caller, LI);
916 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
917 CurrentCallerBFI = TempBFI.get();
918 } else {
919 // New pass manager:
920 CurrentCallerBFI = &(GetBFI(*Caller));
921 }
922 };
923
924 for (User *User : Users) {
925 // Don't bother with BlockAddress used by CallBr for asm goto.
926 if (isa<BlockAddress>(User))
927 continue;
928 CallBase *CB = getSupportedCallBase(User);
929 Function *Caller = CB->getCaller();
930 if (CurrentCaller != Caller) {
931 CurrentCaller = Caller;
932 ComputeCurrBFI(Caller);
933 } else {
934 assert(CurrentCallerBFI && "CallerBFI is not set");
935 }
936 BasicBlock *CallBB = CB->getParent();
937 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
938 if (Count)
939 CallSiteToProfCountMap[User] = *Count;
940 else
941 CallSiteToProfCountMap[User] = 0;
942 }
943}
944
945PartialInlinerImpl::FunctionCloner::FunctionCloner(
946 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
949 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
950 ClonedOI = std::make_unique<FunctionOutliningInfo>();
951
952 // Clone the function, so that we can hack away on it.
954 ClonedFunc = CloneFunction(F, VMap);
955
956 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
957 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
958 for (BasicBlock *BB : OI->Entries)
959 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
960
961 for (BasicBlock *E : OI->ReturnBlockPreds) {
962 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
963 ClonedOI->ReturnBlockPreds.push_back(NewE);
964 }
965 // Go ahead and update all uses to the duplicate, so that we can just
966 // use the inliner functionality when we're done hacking.
967 F->replaceAllUsesWith(ClonedFunc);
968}
969
970PartialInlinerImpl::FunctionCloner::FunctionCloner(
971 Function *F, FunctionOutliningMultiRegionInfo *OI,
975 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
976 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
977
978 // Clone the function, so that we can hack away on it.
980 ClonedFunc = CloneFunction(F, VMap);
981
982 // Go through all Outline Candidate Regions and update all BasicBlock
983 // information.
984 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
985 OI->ORI) {
987 for (BasicBlock *BB : RegionInfo.Region)
988 Region.push_back(cast<BasicBlock>(VMap[BB]));
989
990 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
991 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
992 BasicBlock *NewReturnBlock = nullptr;
993 if (RegionInfo.ReturnBlock)
994 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
995 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
996 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
997 ClonedOMRI->ORI.push_back(MappedRegionInfo);
998 }
999 // Go ahead and update all uses to the duplicate, so that we can just
1000 // use the inliner functionality when we're done hacking.
1001 F->replaceAllUsesWith(ClonedFunc);
1002}
1003
1004void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1005 auto GetFirstPHI = [](BasicBlock *BB) {
1006 BasicBlock::iterator I = BB->begin();
1007 PHINode *FirstPhi = nullptr;
1008 while (I != BB->end()) {
1009 PHINode *Phi = dyn_cast<PHINode>(I);
1010 if (!Phi)
1011 break;
1012 if (!FirstPhi) {
1013 FirstPhi = Phi;
1014 break;
1015 }
1016 }
1017 return FirstPhi;
1018 };
1019
1020 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1021 // blocks.
1022 if (!ClonedOI)
1023 return;
1024
1025 // Special hackery is needed with PHI nodes that have inputs from more than
1026 // one extracted block. For simplicity, just split the PHIs into a two-level
1027 // sequence of PHIs, some of which will go in the extracted region, and some
1028 // of which will go outside.
1029 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1030 // only split block when necessary:
1031 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1032 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1033
1034 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1035 return;
1036
1037 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1038 if (llvm::all_equal(PN->incoming_values()))
1039 return PN->getIncomingValue(0);
1040 return nullptr;
1041 };
1042
1043 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1044 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1045 BasicBlock::iterator I = PreReturn->begin();
1046 Instruction *Ins = &ClonedOI->ReturnBlock->front();
1048 while (I != PreReturn->end()) {
1049 PHINode *OldPhi = dyn_cast<PHINode>(I);
1050 if (!OldPhi)
1051 break;
1052
1053 PHINode *RetPhi =
1054 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1055 OldPhi->replaceAllUsesWith(RetPhi);
1056 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1057
1058 RetPhi->addIncoming(&*I, PreReturn);
1059 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1060 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1061 OldPhi->removeIncomingValue(E);
1062 }
1063
1064 // After incoming values splitting, the old phi may become trivial.
1065 // Keeping the trivial phi can introduce definition inside the outline
1066 // region which is live-out, causing necessary overhead (load, store
1067 // arg passing etc).
1068 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1069 OldPhi->replaceAllUsesWith(OldPhiVal);
1070 DeadPhis.push_back(OldPhi);
1071 }
1072 ++I;
1073 }
1074 for (auto *DP : DeadPhis)
1075 DP->eraseFromParent();
1076
1077 for (auto *E : ClonedOI->ReturnBlockPreds)
1078 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1079}
1080
1081bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082
1083 auto ComputeRegionCost =
1086 for (BasicBlock* BB : Region)
1087 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1088 return Cost;
1089 };
1090
1091 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1092
1093 if (ClonedOMRI->ORI.empty())
1094 return false;
1095
1096 // The CodeExtractor needs a dominator tree.
1097 DominatorTree DT;
1098 DT.recalculate(*ClonedFunc);
1099
1100 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1101 LoopInfo LI(DT);
1102 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1103 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1104
1105 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1106 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1107
1108 SetVector<Value *> Inputs, Outputs, Sinks;
1109 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1110 ClonedOMRI->ORI) {
1111 InstructionCost CurrentOutlinedRegionCost =
1112 ComputeRegionCost(RegionInfo.Region);
1113
1114 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1115 ClonedFuncBFI.get(), &BPI,
1116 LookupAC(*RegionInfo.EntryBlock->getParent()),
1117 /* AllowVarargs */ false);
1118
1119 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1120
1121 LLVM_DEBUG({
1122 dbgs() << "inputs: " << Inputs.size() << "\n";
1123 dbgs() << "outputs: " << Outputs.size() << "\n";
1124 for (Value *value : Inputs)
1125 dbgs() << "value used in func: " << *value << "\n";
1126 for (Value *output : Outputs)
1127 dbgs() << "instr used in func: " << *output << "\n";
1128 });
1129
1130 // Do not extract regions that have live exit variables.
1131 if (Outputs.size() > 0 && !ForceLiveExit)
1132 continue;
1133
1134 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1135 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1136 BasicBlock *OutliningCallBB = OCS->getParent();
1137 assert(OutliningCallBB->getParent() == ClonedFunc);
1138 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139 NumColdRegionsOutlined++;
1140 OutlinedRegionCost += CurrentOutlinedRegionCost;
1141
1142 if (MarkOutlinedColdCC) {
1143 OutlinedFunc->setCallingConv(CallingConv::Cold);
1145 }
1146 } else
1147 ORE.emit([&]() {
1148 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1149 &RegionInfo.Region.front()->front())
1150 << "Failed to extract region at block "
1151 << ore::NV("Block", RegionInfo.Region.front());
1152 });
1153 }
1154
1155 return !OutlinedFunctions.empty();
1156}
1157
1158Function *
1159PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1160 // Returns true if the block is to be partial inlined into the caller
1161 // (i.e. not to be extracted to the out of line function)
1162 auto ToBeInlined = [&, this](BasicBlock *BB) {
1163 return BB == ClonedOI->ReturnBlock ||
1164 llvm::is_contained(ClonedOI->Entries, BB);
1165 };
1166
1167 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1168 // The CodeExtractor needs a dominator tree.
1169 DominatorTree DT;
1170 DT.recalculate(*ClonedFunc);
1171
1172 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1173 LoopInfo LI(DT);
1174 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1175 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1176
1177 // Gather up the blocks that we're going to extract.
1178 std::vector<BasicBlock *> ToExtract;
1179 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1180 ToExtract.push_back(ClonedOI->NonReturnBlock);
1181 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1183 for (BasicBlock &BB : *ClonedFunc)
1184 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1185 ToExtract.push_back(&BB);
1186 // FIXME: the code extractor may hoist/sink more code
1187 // into the outlined function which may make the outlining
1188 // overhead (the difference of the outlined function cost
1189 // and OutliningRegionCost) look larger.
1190 OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
1191 }
1192
1193 // Extract the body of the if.
1194 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1195 Function *OutlinedFunc =
1196 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1197 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1198 /* AllowVarargs */ true)
1199 .extractCodeRegion(CEAC);
1200
1201 if (OutlinedFunc) {
1202 BasicBlock *OutliningCallBB =
1203 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1204 assert(OutliningCallBB->getParent() == ClonedFunc);
1205 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1206 } else
1207 ORE.emit([&]() {
1208 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1209 &ToExtract.front()->front())
1210 << "Failed to extract region at block "
1211 << ore::NV("Block", ToExtract.front());
1212 });
1213
1214 return OutlinedFunc;
1215}
1216
1217PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1218 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1219 // users (function pointers, etc.) back to the original function.
1220 ClonedFunc->replaceAllUsesWith(OrigFunc);
1221 ClonedFunc->eraseFromParent();
1222 if (!IsFunctionInlined) {
1223 // Remove each function that was speculatively created if there is no
1224 // reference.
1225 for (auto FuncBBPair : OutlinedFunctions) {
1226 Function *Func = FuncBBPair.first;
1227 Func->eraseFromParent();
1228 }
1229 }
1230}
1231
1232std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1233 if (F.hasAddressTaken())
1234 return {false, nullptr};
1235
1236 // Let inliner handle it
1237 if (F.hasFnAttribute(Attribute::AlwaysInline))
1238 return {false, nullptr};
1239
1240 if (F.hasFnAttribute(Attribute::NoInline))
1241 return {false, nullptr};
1242
1243 if (PSI.isFunctionEntryCold(&F))
1244 return {false, nullptr};
1245
1246 if (F.users().empty())
1247 return {false, nullptr};
1248
1250
1251 // Only try to outline cold regions if we have a profile summary, which
1252 // implies we have profiling information.
1253 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1255 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256 computeOutliningColdRegionsInfo(F, ORE);
1257 if (OMRI) {
1258 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1259
1260 LLVM_DEBUG({
1261 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1262 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1263 << "\n";
1264 });
1265
1266 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1267
1268 if (DidOutline) {
1269 LLVM_DEBUG({
1270 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271 Cloner.ClonedFunc->print(dbgs());
1272 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1273 });
1274
1275 if (tryPartialInline(Cloner))
1276 return {true, nullptr};
1277 }
1278 }
1279 }
1280
1281 // Fall-thru to regular partial inlining if we:
1282 // i) can't find any cold regions to outline, or
1283 // ii) can't inline the outlined function anywhere.
1284 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1285 if (!OI)
1286 return {false, nullptr};
1287
1288 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289 Cloner.normalizeReturnBlock();
1290
1291 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292
1293 if (!OutlinedFunction)
1294 return {false, nullptr};
1295
1296 if (tryPartialInline(Cloner))
1297 return {true, OutlinedFunction};
1298
1299 return {false, nullptr};
1300}
1301
1302bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303 if (Cloner.OutlinedFunctions.empty())
1304 return false;
1305
1306 auto OutliningCosts = computeOutliningCosts(Cloner);
1307
1308 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1309 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1310
1311 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1312 "Expected valid costs");
1313
1314 // Only calculate RelativeToEntryFreq when we are doing single region
1315 // outlining.
1316 BranchProbability RelativeToEntryFreq;
1317 if (Cloner.ClonedOI)
1318 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1319 else
1320 // RelativeToEntryFreq doesn't make sense when we have more than one
1321 // outlined call because each call will have a different relative frequency
1322 // to the entry block. We can consider using the average, but the
1323 // usefulness of that information is questionable. For now, assume we never
1324 // execute the calls to outlined functions.
1325 RelativeToEntryFreq = BranchProbability(0, 1);
1326
1327 BlockFrequency WeightedRcost =
1328 BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1329
1330 // The call sequence(s) to the outlined function(s) are larger than the sum of
1331 // the original outlined region size(s), it does not increase the chances of
1332 // inlining the function with outlining (The inliner uses the size increase to
1333 // model the cost of inlining a callee).
1334 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1335 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1336 DebugLoc DLoc;
1338 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339 OrigFuncORE.emit([&]() {
1340 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1341 DLoc, Block)
1342 << ore::NV("Function", Cloner.OrigFunc)
1343 << " not partially inlined into callers (Original Size = "
1344 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1345 << ", Size of call sequence to outlined function = "
1346 << ore::NV("NewSize", SizeCost) << ")";
1347 });
1348 return false;
1349 }
1350
1351 assert(Cloner.OrigFunc->users().empty() &&
1352 "F's users should all be replaced!");
1353
1354 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1355 Cloner.ClonedFunc->user_end());
1356
1357 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1358 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359 if (CalleeEntryCount)
1360 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1361
1362 uint64_t CalleeEntryCountV =
1363 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1364
1365 bool AnyInline = false;
1366 for (User *User : Users) {
1367 // Don't bother with BlockAddress used by CallBr for asm goto.
1368 if (isa<BlockAddress>(User))
1369 continue;
1370
1371 CallBase *CB = getSupportedCallBase(User);
1372
1373 if (isLimitReached())
1374 continue;
1375
1376 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1377 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1378 continue;
1379
1380 // Construct remark before doing the inlining, as after successful inlining
1381 // the callsite is removed.
1382 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1383 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1384 << ore::NV("Caller", CB->getCaller());
1385
1386 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1387 // We can only forward varargs when we outlined a single region, else we
1388 // bail on vararg functions.
1389 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1390 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1391 : nullptr))
1392 .isSuccess())
1393 continue;
1394
1395 CallerORE.emit(OR);
1396
1397 // Now update the entry count:
1398 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1399 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1400 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1401 }
1402
1403 AnyInline = true;
1404 NumPartialInlining++;
1405 // Update the stats
1406 if (Cloner.ClonedOI)
1407 NumPartialInlined++;
1408 else
1409 NumColdOutlinePartialInlined++;
1410 }
1411
1412 if (AnyInline) {
1413 Cloner.IsFunctionInlined = true;
1414 if (CalleeEntryCount)
1415 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1416 CalleeEntryCountV, CalleeEntryCount->getType()));
1417 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1418 OrigFuncORE.emit([&]() {
1419 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1420 << "Partially inlined into at least one caller";
1421 });
1422 }
1423
1424 return AnyInline;
1425}
1426
1427bool PartialInlinerImpl::run(Module &M) {
1429 return false;
1430
1431 std::vector<Function *> Worklist;
1432 Worklist.reserve(M.size());
1433 for (Function &F : M)
1434 if (!F.use_empty() && !F.isDeclaration())
1435 Worklist.push_back(&F);
1436
1437 bool Changed = false;
1438 while (!Worklist.empty()) {
1439 Function *CurrFunc = Worklist.back();
1440 Worklist.pop_back();
1441
1442 if (CurrFunc->use_empty())
1443 continue;
1444
1445 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1446 if (Result.second)
1447 Worklist.push_back(Result.second);
1448 Changed |= Result.first;
1449 }
1450
1451 return Changed;
1452}
1453
1456 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1457
1458 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1460 };
1461
1462 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1464 };
1465
1466 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1468 };
1469
1470 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1472 };
1473
1474 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1476 };
1477
1479
1480 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481 GetTLI, PSI, GetBFI)
1482 .run(M))
1483 return PreservedAnalyses::none();
1484 return PreservedAnalyses::all();
1485}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
#define DEBUG_TYPE
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:314
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Instruction & back() const
Definition: BasicBlock.h:328
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1471
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
Class to represent profile counts.
Definition: Function.h:252
const BasicBlock & back() const
Definition: Function.h:765
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:241
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
Represents the cost of inlining a function.
Definition: InlineCost.h:89
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:144
bool isAlways() const
Definition: InlineCost.h:138
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:174
bool isNever() const
Definition: InlineCost.h:139
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:203
bool isSuccess() const
Definition: InlineCost.h:188
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const BasicBlock * getParent() const
Definition: Instruction.h:90
Invoke instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
Definition: SetVector.h:40
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:994
@ CE
Windows NT (Windows on ARM)
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2101
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.