LLVM  8.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/DiagnosticInfo.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
52 #include "llvm/Transforms/IPO.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <cstdint>
59 #include <functional>
60 #include <iterator>
61 #include <memory>
62 #include <tuple>
63 #include <vector>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "partial-inlining"
68 
69 STATISTIC(NumPartialInlined,
70  "Number of callsites functions partially inlined into.");
71 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
72  "cold outlined regions were partially "
73  "inlined into its caller(s).");
74 STATISTIC(NumColdRegionsFound,
75  "Number of cold single entry/exit regions found.");
76 STATISTIC(NumColdRegionsOutlined,
77  "Number of cold single entry/exit regions outlined.");
78 
79 // Command line option to disable partial-inlining. The default is false:
80 static cl::opt<bool>
81  DisablePartialInlining("disable-partial-inlining", cl::init(false),
82  cl::Hidden, cl::desc("Disable partial inlining"));
83 // Command line option to disable multi-region partial-inlining. The default is
84 // false:
86  "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
87  cl::desc("Disable multi-region partial inlining"));
88 
89 // Command line option to force outlining in regions with live exit variables.
90 // The default is false:
91 static cl::opt<bool>
92  ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
93  cl::desc("Force outline regions with live exits"));
94 
95 // Command line option to enable marking outline functions with Cold Calling
96 // Convention. The default is false:
97 static cl::opt<bool>
98  MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
99  cl::desc("Mark outline function calls with ColdCC"));
100 
101 #ifndef NDEBUG
102 // Command line option to debug partial-inlining. The default is none:
103 static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
104  cl::init(false), cl::Hidden,
105  cl::desc("Trace partial inlining."));
106 #endif
107 
108 // This is an option used by testing:
109 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
110  cl::init(false), cl::ZeroOrMore,
112  cl::desc("Skip Cost Analysis"));
113 // Used to determine if a cold region is worth outlining based on
114 // its inlining cost compared to the original function. Default is set at 10%.
115 // ie. if the cold region reduces the inlining cost of the original function by
116 // at least 10%.
118  "min-region-size-ratio", cl::init(0.1), cl::Hidden,
119  cl::desc("Minimum ratio comparing relative sizes of each "
120  "outline candidate and original function"));
121 // Used to tune the minimum number of execution counts needed in the predecessor
122 // block to the cold edge. ie. confidence interval.
123 static cl::opt<unsigned>
124  MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
125  cl::desc("Minimum block executions to consider "
126  "its BranchProbabilityInfo valid"));
127 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
128 // if the branch probability is 10% or less, then it is deemed as 'cold'.
130  "cold-branch-ratio", cl::init(0.1), cl::Hidden,
131  cl::desc("Minimum BranchProbability to consider a region cold."));
132 
134  "max-num-inline-blocks", cl::init(5), cl::Hidden,
135  cl::desc("Max number of blocks to be partially inlined"));
136 
137 // Command line option to set the maximum number of partial inlining allowed
138 // for the module. The default value of -1 means no limit.
140  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
141  cl::desc("Max number of partial inlining. The default is unlimited"));
142 
143 // Used only when PGO or user annotated branch data is absent. It is
144 // the least value that is used to weigh the outline region. If BFI
145 // produces larger value, the BFI value will be used.
146 static cl::opt<int>
147  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
149  cl::desc("Relative frequency of outline region to "
150  "the entry block"));
151 
153  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
154  cl::desc("A debug option to add additional penalty to the computed one."));
155 
156 namespace {
157 
158 struct FunctionOutliningInfo {
159  FunctionOutliningInfo() = default;
160 
161  // Returns the number of blocks to be inlined including all blocks
162  // in Entries and one return block.
163  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164 
165  // A set of blocks including the function entry that guard
166  // the region to be outlined.
168 
169  // The return block that is not included in the outlined region.
170  BasicBlock *ReturnBlock = nullptr;
171 
172  // The dominating block of the region to be outlined.
173  BasicBlock *NonReturnBlock = nullptr;
174 
175  // The set of blocks in Entries that that are predecessors to ReturnBlock
176  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177 };
178 
179 struct FunctionOutliningMultiRegionInfo {
180  FunctionOutliningMultiRegionInfo()
181  : ORI() {}
182 
183  // Container for outline regions
184  struct OutlineRegionInfo {
185  OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
186  BasicBlock *EntryBlock, BasicBlock *ExitBlock,
187  BasicBlock *ReturnBlock)
188  : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
189  ReturnBlock(ReturnBlock) {}
191  BasicBlock *EntryBlock;
192  BasicBlock *ExitBlock;
193  BasicBlock *ReturnBlock;
194  };
195 
197 };
198 
199 struct PartialInlinerImpl {
200 
201  PartialInlinerImpl(
205  ProfileSummaryInfo *ProfSI)
206  : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
207 
208  bool run(Module &M);
209  // Main part of the transformation that calls helper functions to find
210  // outlining candidates, clone & outline the function, and attempt to
211  // partially inline the resulting function. Returns true if
212  // inlining was successful, false otherwise. Also returns the outline
213  // function (only if we partially inlined early returns) as there is a
214  // possibility to further "peel" early return statements that were left in the
215  // outline function due to code size.
216  std::pair<bool, Function *> unswitchFunction(Function *F);
217 
218  // This class speculatively clones the function to be partial inlined.
219  // At the end of partial inlining, the remaining callsites to the cloned
220  // function that are not partially inlined will be fixed up to reference
221  // the original function, and the cloned function will be erased.
222  struct FunctionCloner {
223  // Two constructors, one for single region outlining, the other for
224  // multi-region outlining.
225  FunctionCloner(Function *F, FunctionOutliningInfo *OI,
227  FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
229  ~FunctionCloner();
230 
231  // Prepare for function outlining: making sure there is only
232  // one incoming edge from the extracted/outlined region to
233  // the return block.
234  void NormalizeReturnBlock();
235 
236  // Do function outlining for cold regions.
237  bool doMultiRegionFunctionOutlining();
238  // Do function outlining for region after early return block(s).
239  // NOTE: For vararg functions that do the vararg handling in the outlined
240  // function, we temporarily generate IR that does not properly
241  // forward varargs to the outlined function. Calling InlineFunction
242  // will update calls to the outlined functions to properly forward
243  // the varargs.
244  Function *doSingleRegionFunctionOutlining();
245 
246  Function *OrigFunc = nullptr;
247  Function *ClonedFunc = nullptr;
248 
249  typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250  // Keep track of Outlined Functions and the basic block they're called from.
251  SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
252 
253  // ClonedFunc is inlined in one of its callers after function
254  // outlining.
255  bool IsFunctionInlined = false;
256  // The cost of the region to be outlined.
257  int OutlinedRegionCost = 0;
258  // ClonedOI is specific to outlining non-early return blocks.
259  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
260  // ClonedOMRI is specific to outlining cold regions.
261  std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
262  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
264  };
265 
266 private:
267  int NumPartialInlining = 0;
268  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
269  std::function<TargetTransformInfo &(Function &)> *GetTTI;
271  ProfileSummaryInfo *PSI;
272 
273  // Return the frequency of the OutlininingBB relative to F's entry point.
274  // The result is no larger than 1 and is represented using BP.
275  // (Note that the outlined region's 'head' block can only have incoming
276  // edges from the guarding entry blocks).
277  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
278 
279  // Return true if the callee of CS should be partially inlined with
280  // profit.
281  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
282  BlockFrequency WeightedOutliningRcost,
284 
285  // Try to inline DuplicateFunction (cloned from F with call to
286  // the OutlinedFunction into its callers. Return true
287  // if there is any successful inlining.
288  bool tryPartialInline(FunctionCloner &Cloner);
289 
290  // Compute the mapping from use site of DuplicationFunction to the enclosing
291  // BB's profile count.
292  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
293  DenseMap<User *, uint64_t> &SiteCountMap);
294 
295  bool IsLimitReached() {
296  return (MaxNumPartialInlining != -1 &&
297  NumPartialInlining >= MaxNumPartialInlining);
298  }
299 
300  static CallSite getCallSite(User *U) {
301  CallSite CS;
302  if (CallInst *CI = dyn_cast<CallInst>(U))
303  CS = CallSite(CI);
304  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
305  CS = CallSite(II);
306  else
307  llvm_unreachable("All uses must be calls");
308  return CS;
309  }
310 
311  static CallSite getOneCallSiteTo(Function *F) {
312  User *User = *F->user_begin();
313  return getCallSite(User);
314  }
315 
316  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
317  CallSite CS = getOneCallSiteTo(F);
318  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
319  BasicBlock *Block = CS.getParent();
320  return std::make_tuple(DLoc, Block);
321  }
322 
323  // Returns the costs associated with function outlining:
324  // - The first value is the non-weighted runtime cost for making the call
325  // to the outlined function, including the addtional setup cost in the
326  // outlined function itself;
327  // - The second value is the estimated size of the new call sequence in
328  // basic block Cloner.OutliningCallBB;
329  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
330 
331  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
332  // approximate both the size and runtime cost (Note that in the current
333  // inline cost analysis, there is no clear distinction there either).
334  static int computeBBInlineCost(BasicBlock *BB);
335 
336  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
337  std::unique_ptr<FunctionOutliningMultiRegionInfo>
338  computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
339 };
340 
341 struct PartialInlinerLegacyPass : public ModulePass {
342  static char ID; // Pass identification, replacement for typeid
343 
344  PartialInlinerLegacyPass() : ModulePass(ID) {
346  }
347 
348  void getAnalysisUsage(AnalysisUsage &AU) const override {
352  }
353 
354  bool runOnModule(Module &M) override {
355  if (skipModule(M))
356  return false;
357 
358  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
360  &getAnalysis<TargetTransformInfoWrapperPass>();
361  ProfileSummaryInfo *PSI =
362  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
363 
364  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
365  [&ACT](Function &F) -> AssumptionCache & {
366  return ACT->getAssumptionCache(F);
367  };
368 
369  std::function<TargetTransformInfo &(Function &)> GetTTI =
370  [&TTIWP](Function &F) -> TargetTransformInfo & {
371  return TTIWP->getTTI(F);
372  };
373 
374  return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
375  .run(M);
376  }
377 };
378 
379 } // end anonymous namespace
380 
381 std::unique_ptr<FunctionOutliningMultiRegionInfo>
382 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
384  BasicBlock *EntryBlock = &F->front();
385 
386  DominatorTree DT(*F);
387  LoopInfo LI(DT);
388  BranchProbabilityInfo BPI(*F, LI);
389  std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
391  if (!GetBFI) {
392  ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
393  BFI = ScopedBFI.get();
394  } else
395  BFI = &(*GetBFI)(*F);
396 
397  // Return if we don't have profiling information.
398  if (!PSI->hasInstrumentationProfile())
399  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
400 
401  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
402  llvm::make_unique<FunctionOutliningMultiRegionInfo>();
403 
404  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
405  BasicBlock *Dom = BlockList.front();
406  return BlockList.size() > 1 && pred_size(Dom) == 1;
407  };
408 
409  auto IsSingleExit =
410  [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
411  BasicBlock *ExitBlock = nullptr;
412  for (auto *Block : BlockList) {
413  for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
414  if (!is_contained(BlockList, *SI)) {
415  if (ExitBlock) {
416  ORE.emit([&]() {
417  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
418  &SI->front())
419  << "Region dominated by "
420  << ore::NV("Block", BlockList.front()->getName())
421  << " has more than one region exit edge.";
422  });
423  return nullptr;
424  } else
425  ExitBlock = Block;
426  }
427  }
428  }
429  return ExitBlock;
430  };
431 
432  auto BBProfileCount = [BFI](BasicBlock *BB) {
433  return BFI->getBlockProfileCount(BB)
434  ? BFI->getBlockProfileCount(BB).getValue()
435  : 0;
436  };
437 
438  // Use the same computeBBInlineCost function to compute the cost savings of
439  // the outlining the candidate region.
440  int OverallFunctionCost = 0;
441  for (auto &BB : *F)
442  OverallFunctionCost += computeBBInlineCost(&BB);
443 
444 #ifndef NDEBUG
446  dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
447 #endif
448  int MinOutlineRegionCost =
449  static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
450  BranchProbability MinBranchProbability(
451  static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
452  MinBlockCounterExecution);
453  bool ColdCandidateFound = false;
454  BasicBlock *CurrEntry = EntryBlock;
455  std::vector<BasicBlock *> DFS;
456  DenseMap<BasicBlock *, bool> VisitedMap;
457  DFS.push_back(CurrEntry);
458  VisitedMap[CurrEntry] = true;
459  // Use Depth First Search on the basic blocks to find CFG edges that are
460  // considered cold.
461  // Cold regions considered must also have its inline cost compared to the
462  // overall inline cost of the original function. The region is outlined only
463  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
464  // more.
465  while (!DFS.empty()) {
466  auto *thisBB = DFS.back();
467  DFS.pop_back();
468  // Only consider regions with predecessor blocks that are considered
469  // not-cold (default: part of the top 99.99% of all block counters)
470  // AND greater than our minimum block execution count (default: 100).
471  if (PSI->isColdBB(thisBB, BFI) ||
472  BBProfileCount(thisBB) < MinBlockCounterExecution)
473  continue;
474  for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
475  if (VisitedMap[*SI])
476  continue;
477  VisitedMap[*SI] = true;
478  DFS.push_back(*SI);
479  // If branch isn't cold, we skip to the next one.
480  BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
481  if (SuccProb > MinBranchProbability)
482  continue;
483 #ifndef NDEBUG
484  if (TracePartialInlining) {
485  dbgs() << "Found cold edge: " << thisBB->getName() << "->"
486  << (*SI)->getName() << "\nBranch Probability = " << SuccProb
487  << "\n";
488  }
489 #endif
490  SmallVector<BasicBlock *, 8> DominateVector;
491  DT.getDescendants(*SI, DominateVector);
492  // We can only outline single entry regions (for now).
493  if (!IsSingleEntry(DominateVector))
494  continue;
495  BasicBlock *ExitBlock = nullptr;
496  // We can only outline single exit regions (for now).
497  if (!(ExitBlock = IsSingleExit(DominateVector)))
498  continue;
499  int OutlineRegionCost = 0;
500  for (auto *BB : DominateVector)
501  OutlineRegionCost += computeBBInlineCost(BB);
502 
503 #ifndef NDEBUG
505  dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
506 #endif
507 
508  if (OutlineRegionCost < MinOutlineRegionCost) {
509  ORE.emit([&]() {
510  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
511  &SI->front())
512  << ore::NV("Callee", F) << " inline cost-savings smaller than "
513  << ore::NV("Cost", MinOutlineRegionCost);
514  });
515  continue;
516  }
517  // For now, ignore blocks that belong to a SISE region that is a
518  // candidate for outlining. In the future, we may want to look
519  // at inner regions because the outer region may have live-exit
520  // variables.
521  for (auto *BB : DominateVector)
522  VisitedMap[BB] = true;
523  // ReturnBlock here means the block after the outline call
524  BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
525  // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
526  FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
527  DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
528  RegInfo.Region = DominateVector;
529  OutliningInfo->ORI.push_back(RegInfo);
530 #ifndef NDEBUG
531  if (TracePartialInlining) {
532  dbgs() << "Found Cold Candidate starting at block: "
533  << DominateVector.front()->getName() << "\n";
534  }
535 #endif
536  ColdCandidateFound = true;
537  NumColdRegionsFound++;
538  }
539  }
540  if (ColdCandidateFound)
541  return OutliningInfo;
542  else
543  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
544 }
545 
546 std::unique_ptr<FunctionOutliningInfo>
547 PartialInlinerImpl::computeOutliningInfo(Function *F) {
548  BasicBlock *EntryBlock = &F->front();
549  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
550  if (!BR || BR->isUnconditional())
551  return std::unique_ptr<FunctionOutliningInfo>();
552 
553  // Returns true if Succ is BB's successor
554  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
555  return is_contained(successors(BB), Succ);
556  };
557 
558  auto IsReturnBlock = [](BasicBlock *BB) {
559  Instruction *TI = BB->getTerminator();
560  return isa<ReturnInst>(TI);
561  };
562 
563  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
564  if (IsReturnBlock(Succ1))
565  return std::make_tuple(Succ1, Succ2);
566  if (IsReturnBlock(Succ2))
567  return std::make_tuple(Succ2, Succ1);
568 
569  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
570  };
571 
572  // Detect a triangular shape:
573  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
574  if (IsSuccessor(Succ1, Succ2))
575  return std::make_tuple(Succ1, Succ2);
576  if (IsSuccessor(Succ2, Succ1))
577  return std::make_tuple(Succ2, Succ1);
578 
579  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
580  };
581 
582  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
583  llvm::make_unique<FunctionOutliningInfo>();
584 
585  BasicBlock *CurrEntry = EntryBlock;
586  bool CandidateFound = false;
587  do {
588  // The number of blocks to be inlined has already reached
589  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
590  // disables partial inlining for the function.
591  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
592  break;
593 
594  if (succ_size(CurrEntry) != 2)
595  break;
596 
597  BasicBlock *Succ1 = *succ_begin(CurrEntry);
598  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
599 
600  BasicBlock *ReturnBlock, *NonReturnBlock;
601  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
602 
603  if (ReturnBlock) {
604  OutliningInfo->Entries.push_back(CurrEntry);
605  OutliningInfo->ReturnBlock = ReturnBlock;
606  OutliningInfo->NonReturnBlock = NonReturnBlock;
607  CandidateFound = true;
608  break;
609  }
610 
611  BasicBlock *CommSucc;
613  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
614 
615  if (!CommSucc)
616  break;
617 
618  OutliningInfo->Entries.push_back(CurrEntry);
619  CurrEntry = OtherSucc;
620  } while (true);
621 
622  if (!CandidateFound)
623  return std::unique_ptr<FunctionOutliningInfo>();
624 
625  // Do sanity check of the entries: threre should not
626  // be any successors (not in the entry set) other than
627  // {ReturnBlock, NonReturnBlock}
628  assert(OutliningInfo->Entries[0] == &F->front() &&
629  "Function Entry must be the first in Entries vector");
630  DenseSet<BasicBlock *> Entries;
631  for (BasicBlock *E : OutliningInfo->Entries)
632  Entries.insert(E);
633 
634  // Returns true of BB has Predecessor which is not
635  // in Entries set.
636  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
637  for (auto Pred : predecessors(BB)) {
638  if (!Entries.count(Pred))
639  return true;
640  }
641  return false;
642  };
643  auto CheckAndNormalizeCandidate =
644  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
645  for (BasicBlock *E : OutliningInfo->Entries) {
646  for (auto Succ : successors(E)) {
647  if (Entries.count(Succ))
648  continue;
649  if (Succ == OutliningInfo->ReturnBlock)
650  OutliningInfo->ReturnBlockPreds.push_back(E);
651  else if (Succ != OutliningInfo->NonReturnBlock)
652  return false;
653  }
654  // There should not be any outside incoming edges either:
655  if (HasNonEntryPred(E))
656  return false;
657  }
658  return true;
659  };
660 
661  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
662  return std::unique_ptr<FunctionOutliningInfo>();
663 
664  // Now further growing the candidate's inlining region by
665  // peeling off dominating blocks from the outlining region:
666  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
667  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
668  if (succ_size(Cand) != 2)
669  break;
670 
671  if (HasNonEntryPred(Cand))
672  break;
673 
674  BasicBlock *Succ1 = *succ_begin(Cand);
675  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
676 
677  BasicBlock *ReturnBlock, *NonReturnBlock;
678  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
679  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
680  break;
681 
682  if (NonReturnBlock->getSinglePredecessor() != Cand)
683  break;
684 
685  // Now grow and update OutlininigInfo:
686  OutliningInfo->Entries.push_back(Cand);
687  OutliningInfo->NonReturnBlock = NonReturnBlock;
688  OutliningInfo->ReturnBlockPreds.push_back(Cand);
689  Entries.insert(Cand);
690  }
691 
692  return OutliningInfo;
693 }
694 
695 // Check if there is PGO data or user annoated branch data:
696 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
697  if (F->hasProfileData())
698  return true;
699  // Now check if any of the entry block has MD_prof data:
700  for (auto *E : OI->Entries) {
702  if (!BR || BR->isUnconditional())
703  continue;
704  uint64_t T, F;
705  if (BR->extractProfMetadata(T, F))
706  return true;
707  }
708  return false;
709 }
710 
712 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
713  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
714  auto EntryFreq =
715  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
716  auto OutliningCallFreq =
717  Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
718  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
719  // we outlined any regions, so we may encounter situations where the
720  // OutliningCallFreq is *slightly* bigger than the EntryFreq.
721  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
722  OutliningCallFreq = EntryFreq;
723  }
724  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
725  OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
726 
727  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
728  return OutlineRegionRelFreq;
729 
730  // When profile data is not available, we need to be conservative in
731  // estimating the overall savings. Static branch prediction can usually
732  // guess the branch direction right (taken/non-taken), but the guessed
733  // branch probability is usually not biased enough. In case when the
734  // outlined region is predicted to be likely, its probability needs
735  // to be made higher (more biased) to not under-estimate the cost of
736  // function outlining. On the other hand, if the outlined region
737  // is predicted to be less likely, the predicted probablity is usually
738  // higher than the actual. For instance, the actual probability of the
739  // less likely target is only 5%, but the guessed probablity can be
740  // 40%. In the latter case, there is no need for further adjustement.
741  // FIXME: add an option for this.
742  if (OutlineRegionRelFreq < BranchProbability(45, 100))
743  return OutlineRegionRelFreq;
744 
745  OutlineRegionRelFreq = std::max(
746  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
747 
748  return OutlineRegionRelFreq;
749 }
750 
751 bool PartialInlinerImpl::shouldPartialInline(
752  CallSite CS, FunctionCloner &Cloner,
753  BlockFrequency WeightedOutliningRcost,
755  using namespace ore;
756 
757  Instruction *Call = CS.getInstruction();
759  assert(Callee == Cloner.ClonedFunc);
760 
761  if (SkipCostAnalysis)
762  return isInlineViable(*Callee);
763 
764  Function *Caller = CS.getCaller();
765  auto &CalleeTTI = (*GetTTI)(*Callee);
766  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
767  *GetAssumptionCache, GetBFI, PSI, &ORE);
768 
769  if (IC.isAlways()) {
770  ORE.emit([&]() {
771  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
772  << NV("Callee", Cloner.OrigFunc)
773  << " should always be fully inlined, not partially";
774  });
775  return false;
776  }
777 
778  if (IC.isNever()) {
779  ORE.emit([&]() {
780  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
781  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
782  << NV("Caller", Caller)
783  << " because it should never be inlined (cost=never)";
784  });
785  return false;
786  }
787 
788  if (!IC) {
789  ORE.emit([&]() {
790  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
791  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
792  << NV("Caller", Caller) << " because too costly to inline (cost="
793  << NV("Cost", IC.getCost()) << ", threshold="
794  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
795  });
796  return false;
797  }
798  const DataLayout &DL = Caller->getParent()->getDataLayout();
799 
800  // The savings of eliminating the call:
801  int NonWeightedSavings = getCallsiteCost(CS, DL);
802  BlockFrequency NormWeightedSavings(NonWeightedSavings);
803 
804  // Weighted saving is smaller than weighted cost, return false
805  if (NormWeightedSavings < WeightedOutliningRcost) {
806  ORE.emit([&]() {
807  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
808  Call)
809  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
810  << NV("Caller", Caller) << " runtime overhead (overhead="
811  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
812  << ", savings="
813  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
814  << ")"
815  << " of making the outlined call is too high";
816  });
817 
818  return false;
819  }
820 
821  ORE.emit([&]() {
822  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
823  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
824  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
825  << " (threshold="
826  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
827  });
828  return true;
829 }
830 
831 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
832 // For now use a simplified version. The returned 'InlineCost' will be used
833 // to esimate the size cost as well as runtime cost of the BB.
834 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
835  int InlineCost = 0;
836  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
837  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
838  if (isa<DbgInfoIntrinsic>(I))
839  continue;
840 
841  switch (I->getOpcode()) {
842  case Instruction::BitCast:
843  case Instruction::PtrToInt:
844  case Instruction::IntToPtr:
845  case Instruction::Alloca:
846  continue;
847  case Instruction::GetElementPtr:
848  if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
849  continue;
850  break;
851  default:
852  break;
853  }
854 
855  IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
856  if (IntrInst) {
857  if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
858  IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
859  continue;
860  }
861 
862  if (CallInst *CI = dyn_cast<CallInst>(I)) {
863  InlineCost += getCallsiteCost(CallSite(CI), DL);
864  continue;
865  }
866 
867  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
868  InlineCost += getCallsiteCost(CallSite(II), DL);
869  continue;
870  }
871 
872  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
873  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
874  continue;
875  }
876  InlineCost += InlineConstants::InstrCost;
877  }
878  return InlineCost;
879 }
880 
881 std::tuple<int, int>
882 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
883  int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
884  for (auto FuncBBPair : Cloner.OutlinedFunctions) {
885  Function *OutlinedFunc = FuncBBPair.first;
886  BasicBlock* OutliningCallBB = FuncBBPair.second;
887  // Now compute the cost of the call sequence to the outlined function
888  // 'OutlinedFunction' in BB 'OutliningCallBB':
889  OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
890 
891  // Now compute the cost of the extracted/outlined function itself:
892  for (BasicBlock &BB : *OutlinedFunc)
893  OutlinedFunctionCost += computeBBInlineCost(&BB);
894  }
895  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
896  "Outlined function cost should be no less than the outlined region");
897 
898  // The code extractor introduces a new root and exit stub blocks with
899  // additional unconditional branches. Those branches will be eliminated
900  // later with bb layout. The cost should be adjusted accordingly:
901  OutlinedFunctionCost -=
902  2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
903 
904  int OutliningRuntimeOverhead =
905  OutliningFuncCallCost +
906  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
908 
909  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
910 }
911 
912 // Create the callsite to profile count map which is
913 // used to update the original function's entry count,
914 // after the function is partially inlined into the callsite.
915 void PartialInlinerImpl::computeCallsiteToProfCountMap(
916  Function *DuplicateFunction,
917  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
918  std::vector<User *> Users(DuplicateFunction->user_begin(),
919  DuplicateFunction->user_end());
920  Function *CurrentCaller = nullptr;
921  std::unique_ptr<BlockFrequencyInfo> TempBFI;
922  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
923 
924  auto ComputeCurrBFI = [&,this](Function *Caller) {
925  // For the old pass manager:
926  if (!GetBFI) {
927  DominatorTree DT(*Caller);
928  LoopInfo LI(DT);
929  BranchProbabilityInfo BPI(*Caller, LI);
930  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
931  CurrentCallerBFI = TempBFI.get();
932  } else {
933  // New pass manager:
934  CurrentCallerBFI = &(*GetBFI)(*Caller);
935  }
936  };
937 
938  for (User *User : Users) {
939  CallSite CS = getCallSite(User);
940  Function *Caller = CS.getCaller();
941  if (CurrentCaller != Caller) {
942  CurrentCaller = Caller;
943  ComputeCurrBFI(Caller);
944  } else {
945  assert(CurrentCallerBFI && "CallerBFI is not set");
946  }
947  BasicBlock *CallBB = CS.getInstruction()->getParent();
948  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
949  if (Count)
950  CallSiteToProfCountMap[User] = *Count;
951  else
952  CallSiteToProfCountMap[User] = 0;
953  }
954 }
955 
956 PartialInlinerImpl::FunctionCloner::FunctionCloner(
957  Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
958  : OrigFunc(F), ORE(ORE) {
959  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
960 
961  // Clone the function, so that we can hack away on it.
962  ValueToValueMapTy VMap;
963  ClonedFunc = CloneFunction(F, VMap);
964 
965  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
966  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
967  for (BasicBlock *BB : OI->Entries) {
968  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
969  }
970  for (BasicBlock *E : OI->ReturnBlockPreds) {
971  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
972  ClonedOI->ReturnBlockPreds.push_back(NewE);
973  }
974  // Go ahead and update all uses to the duplicate, so that we can just
975  // use the inliner functionality when we're done hacking.
976  F->replaceAllUsesWith(ClonedFunc);
977 }
978 
979 PartialInlinerImpl::FunctionCloner::FunctionCloner(
980  Function *F, FunctionOutliningMultiRegionInfo *OI,
982  : OrigFunc(F), ORE(ORE) {
983  ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
984 
985  // Clone the function, so that we can hack away on it.
986  ValueToValueMapTy VMap;
987  ClonedFunc = CloneFunction(F, VMap);
988 
989  // Go through all Outline Candidate Regions and update all BasicBlock
990  // information.
991  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
992  OI->ORI) {
994  for (BasicBlock *BB : RegionInfo.Region) {
995  Region.push_back(cast<BasicBlock>(VMap[BB]));
996  }
997  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
998  BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
999  BasicBlock *NewReturnBlock = nullptr;
1000  if (RegionInfo.ReturnBlock)
1001  NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1002  FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1003  Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1004  ClonedOMRI->ORI.push_back(MappedRegionInfo);
1005  }
1006  // Go ahead and update all uses to the duplicate, so that we can just
1007  // use the inliner functionality when we're done hacking.
1008  F->replaceAllUsesWith(ClonedFunc);
1009 }
1010 
1011 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1012  auto getFirstPHI = [](BasicBlock *BB) {
1013  BasicBlock::iterator I = BB->begin();
1014  PHINode *FirstPhi = nullptr;
1015  while (I != BB->end()) {
1016  PHINode *Phi = dyn_cast<PHINode>(I);
1017  if (!Phi)
1018  break;
1019  if (!FirstPhi) {
1020  FirstPhi = Phi;
1021  break;
1022  }
1023  }
1024  return FirstPhi;
1025  };
1026 
1027  // Shouldn't need to normalize PHIs if we're not outlining non-early return
1028  // blocks.
1029  if (!ClonedOI)
1030  return;
1031 
1032  // Special hackery is needed with PHI nodes that have inputs from more than
1033  // one extracted block. For simplicity, just split the PHIs into a two-level
1034  // sequence of PHIs, some of which will go in the extracted region, and some
1035  // of which will go outside.
1036  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1037  // only split block when necessary:
1038  PHINode *FirstPhi = getFirstPHI(PreReturn);
1039  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1040 
1041  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1042  return;
1043 
1044  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1045  Value *CommonValue = PN->getIncomingValue(0);
1046  if (all_of(PN->incoming_values(),
1047  [&](Value *V) { return V == CommonValue; }))
1048  return CommonValue;
1049  return nullptr;
1050  };
1051 
1052  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1053  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1054  BasicBlock::iterator I = PreReturn->begin();
1055  Instruction *Ins = &ClonedOI->ReturnBlock->front();
1057  while (I != PreReturn->end()) {
1058  PHINode *OldPhi = dyn_cast<PHINode>(I);
1059  if (!OldPhi)
1060  break;
1061 
1062  PHINode *RetPhi =
1063  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1064  OldPhi->replaceAllUsesWith(RetPhi);
1065  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1066 
1067  RetPhi->addIncoming(&*I, PreReturn);
1068  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1069  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1070  OldPhi->removeIncomingValue(E);
1071  }
1072 
1073  // After incoming values splitting, the old phi may become trivial.
1074  // Keeping the trivial phi can introduce definition inside the outline
1075  // region which is live-out, causing necessary overhead (load, store
1076  // arg passing etc).
1077  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1078  OldPhi->replaceAllUsesWith(OldPhiVal);
1079  DeadPhis.push_back(OldPhi);
1080  }
1081  ++I;
1082  }
1083  for (auto *DP : DeadPhis)
1084  DP->eraseFromParent();
1085 
1086  for (auto E : ClonedOI->ReturnBlockPreds) {
1087  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1088  }
1089 }
1090 
1091 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1092 
1093  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1094  int Cost = 0;
1095  for (BasicBlock* BB : Region)
1096  Cost += computeBBInlineCost(BB);
1097  return Cost;
1098  };
1099 
1100  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1101 
1102  if (ClonedOMRI->ORI.empty())
1103  return false;
1104 
1105  // The CodeExtractor needs a dominator tree.
1106  DominatorTree DT;
1107  DT.recalculate(*ClonedFunc);
1108 
1109  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1110  LoopInfo LI(DT);
1111  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1112  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1113 
1114  SetVector<Value *> Inputs, Outputs, Sinks;
1115  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1116  ClonedOMRI->ORI) {
1117  int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1118 
1119  CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1120  ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
1121 
1122  CE.findInputsOutputs(Inputs, Outputs, Sinks);
1123 
1124 #ifndef NDEBUG
1125  if (TracePartialInlining) {
1126  dbgs() << "inputs: " << Inputs.size() << "\n";
1127  dbgs() << "outputs: " << Outputs.size() << "\n";
1128  for (Value *value : Inputs)
1129  dbgs() << "value used in func: " << *value << "\n";
1130  for (Value *output : Outputs)
1131  dbgs() << "instr used in func: " << *output << "\n";
1132  }
1133 #endif
1134  // Do not extract regions that have live exit variables.
1135  if (Outputs.size() > 0 && !ForceLiveExit)
1136  continue;
1137 
1138  Function *OutlinedFunc = CE.extractCodeRegion();
1139 
1140  if (OutlinedFunc) {
1141  CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1142  BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1143  assert(OutliningCallBB->getParent() == ClonedFunc);
1144  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1145  NumColdRegionsOutlined++;
1146  OutlinedRegionCost += CurrentOutlinedRegionCost;
1147 
1148  if (MarkOutlinedColdCC) {
1149  OutlinedFunc->setCallingConv(CallingConv::Cold);
1151  }
1152  } else
1153  ORE.emit([&]() {
1154  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1155  &RegionInfo.Region.front()->front())
1156  << "Failed to extract region at block "
1157  << ore::NV("Block", RegionInfo.Region.front());
1158  });
1159  }
1160 
1161  return !OutlinedFunctions.empty();
1162 }
1163 
1164 Function *
1165 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1166  // Returns true if the block is to be partial inlined into the caller
1167  // (i.e. not to be extracted to the out of line function)
1168  auto ToBeInlined = [&, this](BasicBlock *BB) {
1169  return BB == ClonedOI->ReturnBlock ||
1170  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1171  ClonedOI->Entries.end());
1172  };
1173 
1174  assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1175  // The CodeExtractor needs a dominator tree.
1176  DominatorTree DT;
1177  DT.recalculate(*ClonedFunc);
1178 
1179  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1180  LoopInfo LI(DT);
1181  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1182  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1183 
1184  // Gather up the blocks that we're going to extract.
1185  std::vector<BasicBlock *> ToExtract;
1186  ToExtract.push_back(ClonedOI->NonReturnBlock);
1187  OutlinedRegionCost +=
1188  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1189  for (BasicBlock &BB : *ClonedFunc)
1190  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1191  ToExtract.push_back(&BB);
1192  // FIXME: the code extractor may hoist/sink more code
1193  // into the outlined function which may make the outlining
1194  // overhead (the difference of the outlined function cost
1195  // and OutliningRegionCost) look larger.
1196  OutlinedRegionCost += computeBBInlineCost(&BB);
1197  }
1198 
1199  // Extract the body of the if.
1200  Function *OutlinedFunc =
1201  CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1202  ClonedFuncBFI.get(), &BPI,
1203  /* AllowVarargs */ true)
1204  .extractCodeRegion();
1205 
1206  if (OutlinedFunc) {
1207  BasicBlock *OutliningCallBB =
1208  PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1209  .getInstruction()
1210  ->getParent();
1211  assert(OutliningCallBB->getParent() == ClonedFunc);
1212  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1213  } else
1214  ORE.emit([&]() {
1215  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1216  &ToExtract.front()->front())
1217  << "Failed to extract region at block "
1218  << ore::NV("Block", ToExtract.front());
1219  });
1220 
1221  return OutlinedFunc;
1222 }
1223 
1224 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1225  // Ditch the duplicate, since we're done with it, and rewrite all remaining
1226  // users (function pointers, etc.) back to the original function.
1227  ClonedFunc->replaceAllUsesWith(OrigFunc);
1228  ClonedFunc->eraseFromParent();
1229  if (!IsFunctionInlined) {
1230  // Remove each function that was speculatively created if there is no
1231  // reference.
1232  for (auto FuncBBPair : OutlinedFunctions) {
1233  Function *Func = FuncBBPair.first;
1234  Func->eraseFromParent();
1235  }
1236  }
1237 }
1238 
1239 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1240 
1241  if (F->hasAddressTaken())
1242  return {false, nullptr};
1243 
1244  // Let inliner handle it
1245  if (F->hasFnAttribute(Attribute::AlwaysInline))
1246  return {false, nullptr};
1247 
1248  if (F->hasFnAttribute(Attribute::NoInline))
1249  return {false, nullptr};
1250 
1251  if (PSI->isFunctionEntryCold(F))
1252  return {false, nullptr};
1253 
1254  if (F->user_begin() == F->user_end())
1255  return {false, nullptr};
1256 
1258 
1259  // Only try to outline cold regions if we have a profile summary, which
1260  // implies we have profiling information.
1261  if (PSI->hasProfileSummary() && F->hasProfileData() &&
1263  std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1264  computeOutliningColdRegionsInfo(F, ORE);
1265  if (OMRI) {
1266  FunctionCloner Cloner(F, OMRI.get(), ORE);
1267 
1268 #ifndef NDEBUG
1269  if (TracePartialInlining) {
1270  dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1271  dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1272  << "\n";
1273  }
1274 #endif
1275  bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1276 
1277  if (DidOutline) {
1278 #ifndef NDEBUG
1279  if (TracePartialInlining) {
1280  dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1281  Cloner.ClonedFunc->print(dbgs());
1282  dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1283  }
1284 #endif
1285 
1286  if (tryPartialInline(Cloner))
1287  return {true, nullptr};
1288  }
1289  }
1290  }
1291 
1292  // Fall-thru to regular partial inlining if we:
1293  // i) can't find any cold regions to outline, or
1294  // ii) can't inline the outlined function anywhere.
1295  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1296  if (!OI)
1297  return {false, nullptr};
1298 
1299  FunctionCloner Cloner(F, OI.get(), ORE);
1300  Cloner.NormalizeReturnBlock();
1301 
1302  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1303 
1304  if (!OutlinedFunction)
1305  return {false, nullptr};
1306 
1307  bool AnyInline = tryPartialInline(Cloner);
1308 
1309  if (AnyInline)
1310  return {true, OutlinedFunction};
1311 
1312  return {false, nullptr};
1313 }
1314 
1315 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1316  if (Cloner.OutlinedFunctions.empty())
1317  return false;
1318 
1319  int SizeCost = 0;
1320  BlockFrequency WeightedRcost;
1321  int NonWeightedRcost;
1322  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1323 
1324  // Only calculate RelativeToEntryFreq when we are doing single region
1325  // outlining.
1326  BranchProbability RelativeToEntryFreq;
1327  if (Cloner.ClonedOI) {
1328  RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1329  } else
1330  // RelativeToEntryFreq doesn't make sense when we have more than one
1331  // outlined call because each call will have a different relative frequency
1332  // to the entry block. We can consider using the average, but the
1333  // usefulness of that information is questionable. For now, assume we never
1334  // execute the calls to outlined functions.
1335  RelativeToEntryFreq = BranchProbability(0, 1);
1336 
1337  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1338 
1339  // The call sequence(s) to the outlined function(s) are larger than the sum of
1340  // the original outlined region size(s), it does not increase the chances of
1341  // inlining the function with outlining (The inliner uses the size increase to
1342  // model the cost of inlining a callee).
1343  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1344  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1345  DebugLoc DLoc;
1346  BasicBlock *Block;
1347  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1348  OrigFuncORE.emit([&]() {
1349  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1350  DLoc, Block)
1351  << ore::NV("Function", Cloner.OrigFunc)
1352  << " not partially inlined into callers (Original Size = "
1353  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1354  << ", Size of call sequence to outlined function = "
1355  << ore::NV("NewSize", SizeCost) << ")";
1356  });
1357  return false;
1358  }
1359 
1360  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
1361  "F's users should all be replaced!");
1362 
1363  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1364  Cloner.ClonedFunc->user_end());
1365 
1366  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1367  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1368  if (CalleeEntryCount)
1369  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1370 
1371  uint64_t CalleeEntryCountV =
1372  (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1373 
1374  bool AnyInline = false;
1375  for (User *User : Users) {
1376  CallSite CS = getCallSite(User);
1377 
1378  if (IsLimitReached())
1379  continue;
1380 
1381  OptimizationRemarkEmitter CallerORE(CS.getCaller());
1382  if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
1383  continue;
1384 
1385  // Construct remark before doing the inlining, as after successful inlining
1386  // the callsite is removed.
1387  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1388  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1389  << ore::NV("Caller", CS.getCaller());
1390 
1391  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
1392  // We can only forward varargs when we outlined a single region, else we
1393  // bail on vararg functions.
1394  if (!InlineFunction(CS, IFI, nullptr, true,
1395  (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1396  : nullptr)))
1397  continue;
1398 
1399  CallerORE.emit(OR);
1400 
1401  // Now update the entry count:
1402  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1403  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1404  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1405  }
1406 
1407  AnyInline = true;
1408  NumPartialInlining++;
1409  // Update the stats
1410  if (Cloner.ClonedOI)
1411  NumPartialInlined++;
1412  else
1413  NumColdOutlinePartialInlined++;
1414 
1415  }
1416 
1417  if (AnyInline) {
1418  Cloner.IsFunctionInlined = true;
1419  if (CalleeEntryCount)
1420  Cloner.OrigFunc->setEntryCount(
1421  CalleeEntryCount.setCount(CalleeEntryCountV));
1422  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1423  OrigFuncORE.emit([&]() {
1424  return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1425  << "Partially inlined into at least one caller";
1426  });
1427 
1428  }
1429 
1430  return AnyInline;
1431 }
1432 
1433 bool PartialInlinerImpl::run(Module &M) {
1435  return false;
1436 
1437  std::vector<Function *> Worklist;
1438  Worklist.reserve(M.size());
1439  for (Function &F : M)
1440  if (!F.use_empty() && !F.isDeclaration())
1441  Worklist.push_back(&F);
1442 
1443  bool Changed = false;
1444  while (!Worklist.empty()) {
1445  Function *CurrFunc = Worklist.back();
1446  Worklist.pop_back();
1447 
1448  if (CurrFunc->use_empty())
1449  continue;
1450 
1451  bool Recursive = false;
1452  for (User *U : CurrFunc->users())
1453  if (Instruction *I = dyn_cast<Instruction>(U))
1454  if (I->getParent()->getParent() == CurrFunc) {
1455  Recursive = true;
1456  break;
1457  }
1458  if (Recursive)
1459  continue;
1460 
1461  std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1462  if (Result.second)
1463  Worklist.push_back(Result.second);
1464  Changed |= Result.first;
1465  }
1466 
1467  return Changed;
1468 }
1469 
1471 
1472 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1473  "Partial Inliner", false, false)
1477 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1478  "Partial Inliner", false, false)
1479 
1481  return new PartialInlinerLegacyPass();
1482 }
1483 
1485  ModuleAnalysisManager &AM) {
1486  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1487 
1488  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1489  [&FAM](Function &F) -> AssumptionCache & {
1490  return FAM.getResult<AssumptionAnalysis>(F);
1491  };
1492 
1493  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1494  [&FAM](Function &F) -> BlockFrequencyInfo & {
1495  return FAM.getResult<BlockFrequencyAnalysis>(F);
1496  };
1497 
1498  std::function<TargetTransformInfo &(Function &)> GetTTI =
1499  [&FAM](Function &F) -> TargetTransformInfo & {
1500  return FAM.getResult<TargetIRAnalysis>(F);
1501  };
1502 
1504 
1505  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
1506  .run(M))
1507  return PreservedAnalyses::none();
1508  return PreservedAnalyses::all();
1509 }
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:50
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:106
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
partial Partial Inliner
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
CloneFunction - Return a copy of the specified function and add it to that function&#39;s module...
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:107
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
Definition: Function.h:665
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
iv Induction Variable Users
Definition: IVUsers.cpp:52
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:177
Represents the cost of inlining a function.
Definition: InlineCost.h:65
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:263
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
#define T
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:179
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:263
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:157
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:217
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isAlways() const
Definition: InlineCost.h:105
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:598
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:236
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
Definition: BasicBlock.h:275
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:188
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1063
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:265
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:123
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
iterator_range< user_iterator > users()
Definition: Value.h:400
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:111
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:132
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
unsigned succ_size(const Instruction *I)
Definition: CFG.h:259
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
size_t size() const
Definition: Module.h:593
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
unsigned pred_size(const BasicBlock *BB)
Definition: CFG.h:120
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:215
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1255
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
const BasicBlock & front() const
Definition: Function.h:663
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:262
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1315
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:308
bool use_empty() const
Definition: Value.h:323
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038
user_iterator user_end()
Definition: Value.h:384
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1101