LLVM  10.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs partial inlining, typically by inlining an if statement
10 // that surrounds the body of the function.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CFG.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/DiagnosticInfo.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
51 #include "llvm/Transforms/IPO.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <functional>
59 #include <iterator>
60 #include <memory>
61 #include <tuple>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "partial-inlining"
67 
68 STATISTIC(NumPartialInlined,
69  "Number of callsites functions partially inlined into.");
70 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
71  "cold outlined regions were partially "
72  "inlined into its caller(s).");
73 STATISTIC(NumColdRegionsFound,
74  "Number of cold single entry/exit regions found.");
75 STATISTIC(NumColdRegionsOutlined,
76  "Number of cold single entry/exit regions outlined.");
77 
78 // Command line option to disable partial-inlining. The default is false:
79 static cl::opt<bool>
80  DisablePartialInlining("disable-partial-inlining", cl::init(false),
81  cl::Hidden, cl::desc("Disable partial inlining"));
82 // Command line option to disable multi-region partial-inlining. The default is
83 // false:
85  "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
86  cl::desc("Disable multi-region partial inlining"));
87 
88 // Command line option to force outlining in regions with live exit variables.
89 // The default is false:
90 static cl::opt<bool>
91  ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
92  cl::desc("Force outline regions with live exits"));
93 
94 // Command line option to enable marking outline functions with Cold Calling
95 // Convention. The default is false:
96 static cl::opt<bool>
97  MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
98  cl::desc("Mark outline function calls with ColdCC"));
99 
100 #ifndef NDEBUG
101 // Command line option to debug partial-inlining. The default is none:
102 static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
103  cl::init(false), cl::Hidden,
104  cl::desc("Trace partial inlining."));
105 #endif
106 
107 // This is an option used by testing:
108 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
109  cl::init(false), cl::ZeroOrMore,
111  cl::desc("Skip Cost Analysis"));
112 // Used to determine if a cold region is worth outlining based on
113 // its inlining cost compared to the original function. Default is set at 10%.
114 // ie. if the cold region reduces the inlining cost of the original function by
115 // at least 10%.
117  "min-region-size-ratio", cl::init(0.1), cl::Hidden,
118  cl::desc("Minimum ratio comparing relative sizes of each "
119  "outline candidate and original function"));
120 // Used to tune the minimum number of execution counts needed in the predecessor
121 // block to the cold edge. ie. confidence interval.
122 static cl::opt<unsigned>
123  MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
124  cl::desc("Minimum block executions to consider "
125  "its BranchProbabilityInfo valid"));
126 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
127 // if the branch probability is 10% or less, then it is deemed as 'cold'.
129  "cold-branch-ratio", cl::init(0.1), cl::Hidden,
130  cl::desc("Minimum BranchProbability to consider a region cold."));
131 
133  "max-num-inline-blocks", cl::init(5), cl::Hidden,
134  cl::desc("Max number of blocks to be partially inlined"));
135 
136 // Command line option to set the maximum number of partial inlining allowed
137 // for the module. The default value of -1 means no limit.
139  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
140  cl::desc("Max number of partial inlining. The default is unlimited"));
141 
142 // Used only when PGO or user annotated branch data is absent. It is
143 // the least value that is used to weigh the outline region. If BFI
144 // produces larger value, the BFI value will be used.
145 static cl::opt<int>
146  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
148  cl::desc("Relative frequency of outline region to "
149  "the entry block"));
150 
152  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
153  cl::desc("A debug option to add additional penalty to the computed one."));
154 
155 namespace {
156 
157 struct FunctionOutliningInfo {
158  FunctionOutliningInfo() = default;
159 
160  // Returns the number of blocks to be inlined including all blocks
161  // in Entries and one return block.
162  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
163 
164  // A set of blocks including the function entry that guard
165  // the region to be outlined.
167 
168  // The return block that is not included in the outlined region.
169  BasicBlock *ReturnBlock = nullptr;
170 
171  // The dominating block of the region to be outlined.
172  BasicBlock *NonReturnBlock = nullptr;
173 
174  // The set of blocks in Entries that that are predecessors to ReturnBlock
175  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
176 };
177 
178 struct FunctionOutliningMultiRegionInfo {
179  FunctionOutliningMultiRegionInfo()
180  : ORI() {}
181 
182  // Container for outline regions
183  struct OutlineRegionInfo {
184  OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
185  BasicBlock *EntryBlock, BasicBlock *ExitBlock,
186  BasicBlock *ReturnBlock)
187  : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
188  ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
190  BasicBlock *EntryBlock;
191  BasicBlock *ExitBlock;
192  BasicBlock *ReturnBlock;
193  };
194 
196 };
197 
198 struct PartialInlinerImpl {
199 
200  PartialInlinerImpl(
202  function_ref<AssumptionCache *(Function &)> LookupAC,
205  ProfileSummaryInfo *ProfSI)
206  : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
207  GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
208 
209  bool run(Module &M);
210  // Main part of the transformation that calls helper functions to find
211  // outlining candidates, clone & outline the function, and attempt to
212  // partially inline the resulting function. Returns true if
213  // inlining was successful, false otherwise. Also returns the outline
214  // function (only if we partially inlined early returns) as there is a
215  // possibility to further "peel" early return statements that were left in the
216  // outline function due to code size.
217  std::pair<bool, Function *> unswitchFunction(Function *F);
218 
219  // This class speculatively clones the function to be partial inlined.
220  // At the end of partial inlining, the remaining callsites to the cloned
221  // function that are not partially inlined will be fixed up to reference
222  // the original function, and the cloned function will be erased.
223  struct FunctionCloner {
224  // Two constructors, one for single region outlining, the other for
225  // multi-region outlining.
226  FunctionCloner(Function *F, FunctionOutliningInfo *OI,
228  function_ref<AssumptionCache *(Function &)> LookupAC);
229  FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
231  function_ref<AssumptionCache *(Function &)> LookupAC);
232  ~FunctionCloner();
233 
234  // Prepare for function outlining: making sure there is only
235  // one incoming edge from the extracted/outlined region to
236  // the return block.
237  void NormalizeReturnBlock();
238 
239  // Do function outlining for cold regions.
240  bool doMultiRegionFunctionOutlining();
241  // Do function outlining for region after early return block(s).
242  // NOTE: For vararg functions that do the vararg handling in the outlined
243  // function, we temporarily generate IR that does not properly
244  // forward varargs to the outlined function. Calling InlineFunction
245  // will update calls to the outlined functions to properly forward
246  // the varargs.
247  Function *doSingleRegionFunctionOutlining();
248 
249  Function *OrigFunc = nullptr;
250  Function *ClonedFunc = nullptr;
251 
252  typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
253  // Keep track of Outlined Functions and the basic block they're called from.
254  SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
255 
256  // ClonedFunc is inlined in one of its callers after function
257  // outlining.
258  bool IsFunctionInlined = false;
259  // The cost of the region to be outlined.
260  int OutlinedRegionCost = 0;
261  // ClonedOI is specific to outlining non-early return blocks.
262  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
263  // ClonedOMRI is specific to outlining cold regions.
264  std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
265  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
268  };
269 
270 private:
271  int NumPartialInlining = 0;
272  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
273  function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
274  std::function<TargetTransformInfo &(Function &)> *GetTTI;
276  ProfileSummaryInfo *PSI;
277 
278  // Return the frequency of the OutlininingBB relative to F's entry point.
279  // The result is no larger than 1 and is represented using BP.
280  // (Note that the outlined region's 'head' block can only have incoming
281  // edges from the guarding entry blocks).
282  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
283 
284  // Return true if the callee of CS should be partially inlined with
285  // profit.
286  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
287  BlockFrequency WeightedOutliningRcost,
289 
290  // Try to inline DuplicateFunction (cloned from F with call to
291  // the OutlinedFunction into its callers. Return true
292  // if there is any successful inlining.
293  bool tryPartialInline(FunctionCloner &Cloner);
294 
295  // Compute the mapping from use site of DuplicationFunction to the enclosing
296  // BB's profile count.
297  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
298  DenseMap<User *, uint64_t> &SiteCountMap);
299 
300  bool IsLimitReached() {
301  return (MaxNumPartialInlining != -1 &&
302  NumPartialInlining >= MaxNumPartialInlining);
303  }
304 
305  static CallSite getCallSite(User *U) {
306  CallSite CS;
307  if (CallInst *CI = dyn_cast<CallInst>(U))
308  CS = CallSite(CI);
309  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
310  CS = CallSite(II);
311  else
312  llvm_unreachable("All uses must be calls");
313  return CS;
314  }
315 
316  static CallSite getOneCallSiteTo(Function *F) {
317  User *User = *F->user_begin();
318  return getCallSite(User);
319  }
320 
321  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
322  CallSite CS = getOneCallSiteTo(F);
323  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
324  BasicBlock *Block = CS.getParent();
325  return std::make_tuple(DLoc, Block);
326  }
327 
328  // Returns the costs associated with function outlining:
329  // - The first value is the non-weighted runtime cost for making the call
330  // to the outlined function, including the addtional setup cost in the
331  // outlined function itself;
332  // - The second value is the estimated size of the new call sequence in
333  // basic block Cloner.OutliningCallBB;
334  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
335 
336  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
337  // approximate both the size and runtime cost (Note that in the current
338  // inline cost analysis, there is no clear distinction there either).
339  static int computeBBInlineCost(BasicBlock *BB);
340 
341  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
342  std::unique_ptr<FunctionOutliningMultiRegionInfo>
343  computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
344 };
345 
346 struct PartialInlinerLegacyPass : public ModulePass {
347  static char ID; // Pass identification, replacement for typeid
348 
349  PartialInlinerLegacyPass() : ModulePass(ID) {
351  }
352 
353  void getAnalysisUsage(AnalysisUsage &AU) const override {
357  }
358 
359  bool runOnModule(Module &M) override {
360  if (skipModule(M))
361  return false;
362 
363  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
365  &getAnalysis<TargetTransformInfoWrapperPass>();
366  ProfileSummaryInfo *PSI =
367  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
368 
369  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
370  [&ACT](Function &F) -> AssumptionCache & {
371  return ACT->getAssumptionCache(F);
372  };
373 
374  auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
375  return ACT->lookupAssumptionCache(F);
376  };
377 
378  std::function<TargetTransformInfo &(Function &)> GetTTI =
379  [&TTIWP](Function &F) -> TargetTransformInfo & {
380  return TTIWP->getTTI(F);
381  };
382 
383  return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
384  &GetTTI, NoneType::None, PSI)
385  .run(M);
386  }
387 };
388 
389 } // end anonymous namespace
390 
391 std::unique_ptr<FunctionOutliningMultiRegionInfo>
392 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
394  BasicBlock *EntryBlock = &F->front();
395 
396  DominatorTree DT(*F);
397  LoopInfo LI(DT);
398  BranchProbabilityInfo BPI(*F, LI);
399  std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
401  if (!GetBFI) {
402  ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
403  BFI = ScopedBFI.get();
404  } else
405  BFI = &(*GetBFI)(*F);
406 
407  // Return if we don't have profiling information.
408  if (!PSI->hasInstrumentationProfile())
409  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
410 
411  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
412  std::make_unique<FunctionOutliningMultiRegionInfo>();
413 
414  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
415  BasicBlock *Dom = BlockList.front();
416  return BlockList.size() > 1 && Dom->hasNPredecessors(1);
417  };
418 
419  auto IsSingleExit =
420  [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
421  BasicBlock *ExitBlock = nullptr;
422  for (auto *Block : BlockList) {
423  for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
424  if (!is_contained(BlockList, *SI)) {
425  if (ExitBlock) {
426  ORE.emit([&]() {
427  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
428  &SI->front())
429  << "Region dominated by "
430  << ore::NV("Block", BlockList.front()->getName())
431  << " has more than one region exit edge.";
432  });
433  return nullptr;
434  } else
435  ExitBlock = Block;
436  }
437  }
438  }
439  return ExitBlock;
440  };
441 
442  auto BBProfileCount = [BFI](BasicBlock *BB) {
443  return BFI->getBlockProfileCount(BB)
444  ? BFI->getBlockProfileCount(BB).getValue()
445  : 0;
446  };
447 
448  // Use the same computeBBInlineCost function to compute the cost savings of
449  // the outlining the candidate region.
450  int OverallFunctionCost = 0;
451  for (auto &BB : *F)
452  OverallFunctionCost += computeBBInlineCost(&BB);
453 
454 #ifndef NDEBUG
456  dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
457 #endif
458  int MinOutlineRegionCost =
459  static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
460  BranchProbability MinBranchProbability(
461  static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
462  MinBlockCounterExecution);
463  bool ColdCandidateFound = false;
464  BasicBlock *CurrEntry = EntryBlock;
465  std::vector<BasicBlock *> DFS;
466  DenseMap<BasicBlock *, bool> VisitedMap;
467  DFS.push_back(CurrEntry);
468  VisitedMap[CurrEntry] = true;
469  // Use Depth First Search on the basic blocks to find CFG edges that are
470  // considered cold.
471  // Cold regions considered must also have its inline cost compared to the
472  // overall inline cost of the original function. The region is outlined only
473  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
474  // more.
475  while (!DFS.empty()) {
476  auto *thisBB = DFS.back();
477  DFS.pop_back();
478  // Only consider regions with predecessor blocks that are considered
479  // not-cold (default: part of the top 99.99% of all block counters)
480  // AND greater than our minimum block execution count (default: 100).
481  if (PSI->isColdBlock(thisBB, BFI) ||
482  BBProfileCount(thisBB) < MinBlockCounterExecution)
483  continue;
484  for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
485  if (VisitedMap[*SI])
486  continue;
487  VisitedMap[*SI] = true;
488  DFS.push_back(*SI);
489  // If branch isn't cold, we skip to the next one.
490  BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
491  if (SuccProb > MinBranchProbability)
492  continue;
493 #ifndef NDEBUG
494  if (TracePartialInlining) {
495  dbgs() << "Found cold edge: " << thisBB->getName() << "->"
496  << (*SI)->getName() << "\nBranch Probability = " << SuccProb
497  << "\n";
498  }
499 #endif
500  SmallVector<BasicBlock *, 8> DominateVector;
501  DT.getDescendants(*SI, DominateVector);
502  // We can only outline single entry regions (for now).
503  if (!IsSingleEntry(DominateVector))
504  continue;
505  BasicBlock *ExitBlock = nullptr;
506  // We can only outline single exit regions (for now).
507  if (!(ExitBlock = IsSingleExit(DominateVector)))
508  continue;
509  int OutlineRegionCost = 0;
510  for (auto *BB : DominateVector)
511  OutlineRegionCost += computeBBInlineCost(BB);
512 
513 #ifndef NDEBUG
515  dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
516 #endif
517 
518  if (OutlineRegionCost < MinOutlineRegionCost) {
519  ORE.emit([&]() {
520  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
521  &SI->front())
522  << ore::NV("Callee", F) << " inline cost-savings smaller than "
523  << ore::NV("Cost", MinOutlineRegionCost);
524  });
525  continue;
526  }
527  // For now, ignore blocks that belong to a SISE region that is a
528  // candidate for outlining. In the future, we may want to look
529  // at inner regions because the outer region may have live-exit
530  // variables.
531  for (auto *BB : DominateVector)
532  VisitedMap[BB] = true;
533  // ReturnBlock here means the block after the outline call
534  BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
535  // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
536  FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
537  DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
538  OutliningInfo->ORI.push_back(RegInfo);
539 #ifndef NDEBUG
540  if (TracePartialInlining) {
541  dbgs() << "Found Cold Candidate starting at block: "
542  << DominateVector.front()->getName() << "\n";
543  }
544 #endif
545  ColdCandidateFound = true;
546  NumColdRegionsFound++;
547  }
548  }
549  if (ColdCandidateFound)
550  return OutliningInfo;
551  else
552  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
553 }
554 
555 std::unique_ptr<FunctionOutliningInfo>
556 PartialInlinerImpl::computeOutliningInfo(Function *F) {
557  BasicBlock *EntryBlock = &F->front();
558  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
559  if (!BR || BR->isUnconditional())
560  return std::unique_ptr<FunctionOutliningInfo>();
561 
562  // Returns true if Succ is BB's successor
563  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
564  return is_contained(successors(BB), Succ);
565  };
566 
567  auto IsReturnBlock = [](BasicBlock *BB) {
568  Instruction *TI = BB->getTerminator();
569  return isa<ReturnInst>(TI);
570  };
571 
572  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
573  if (IsReturnBlock(Succ1))
574  return std::make_tuple(Succ1, Succ2);
575  if (IsReturnBlock(Succ2))
576  return std::make_tuple(Succ2, Succ1);
577 
578  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
579  };
580 
581  // Detect a triangular shape:
582  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
583  if (IsSuccessor(Succ1, Succ2))
584  return std::make_tuple(Succ1, Succ2);
585  if (IsSuccessor(Succ2, Succ1))
586  return std::make_tuple(Succ2, Succ1);
587 
588  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
589  };
590 
591  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
592  std::make_unique<FunctionOutliningInfo>();
593 
594  BasicBlock *CurrEntry = EntryBlock;
595  bool CandidateFound = false;
596  do {
597  // The number of blocks to be inlined has already reached
598  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
599  // disables partial inlining for the function.
600  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
601  break;
602 
603  if (succ_size(CurrEntry) != 2)
604  break;
605 
606  BasicBlock *Succ1 = *succ_begin(CurrEntry);
607  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
608 
609  BasicBlock *ReturnBlock, *NonReturnBlock;
610  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
611 
612  if (ReturnBlock) {
613  OutliningInfo->Entries.push_back(CurrEntry);
614  OutliningInfo->ReturnBlock = ReturnBlock;
615  OutliningInfo->NonReturnBlock = NonReturnBlock;
616  CandidateFound = true;
617  break;
618  }
619 
620  BasicBlock *CommSucc;
622  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
623 
624  if (!CommSucc)
625  break;
626 
627  OutliningInfo->Entries.push_back(CurrEntry);
628  CurrEntry = OtherSucc;
629  } while (true);
630 
631  if (!CandidateFound)
632  return std::unique_ptr<FunctionOutliningInfo>();
633 
634  // Do sanity check of the entries: threre should not
635  // be any successors (not in the entry set) other than
636  // {ReturnBlock, NonReturnBlock}
637  assert(OutliningInfo->Entries[0] == &F->front() &&
638  "Function Entry must be the first in Entries vector");
639  DenseSet<BasicBlock *> Entries;
640  for (BasicBlock *E : OutliningInfo->Entries)
641  Entries.insert(E);
642 
643  // Returns true of BB has Predecessor which is not
644  // in Entries set.
645  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
646  for (auto Pred : predecessors(BB)) {
647  if (!Entries.count(Pred))
648  return true;
649  }
650  return false;
651  };
652  auto CheckAndNormalizeCandidate =
653  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
654  for (BasicBlock *E : OutliningInfo->Entries) {
655  for (auto Succ : successors(E)) {
656  if (Entries.count(Succ))
657  continue;
658  if (Succ == OutliningInfo->ReturnBlock)
659  OutliningInfo->ReturnBlockPreds.push_back(E);
660  else if (Succ != OutliningInfo->NonReturnBlock)
661  return false;
662  }
663  // There should not be any outside incoming edges either:
664  if (HasNonEntryPred(E))
665  return false;
666  }
667  return true;
668  };
669 
670  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
671  return std::unique_ptr<FunctionOutliningInfo>();
672 
673  // Now further growing the candidate's inlining region by
674  // peeling off dominating blocks from the outlining region:
675  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
676  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
677  if (succ_size(Cand) != 2)
678  break;
679 
680  if (HasNonEntryPred(Cand))
681  break;
682 
683  BasicBlock *Succ1 = *succ_begin(Cand);
684  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
685 
686  BasicBlock *ReturnBlock, *NonReturnBlock;
687  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
688  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
689  break;
690 
691  if (NonReturnBlock->getSinglePredecessor() != Cand)
692  break;
693 
694  // Now grow and update OutlininigInfo:
695  OutliningInfo->Entries.push_back(Cand);
696  OutliningInfo->NonReturnBlock = NonReturnBlock;
697  OutliningInfo->ReturnBlockPreds.push_back(Cand);
698  Entries.insert(Cand);
699  }
700 
701  return OutliningInfo;
702 }
703 
704 // Check if there is PGO data or user annoated branch data:
705 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
706  if (F->hasProfileData())
707  return true;
708  // Now check if any of the entry block has MD_prof data:
709  for (auto *E : OI->Entries) {
711  if (!BR || BR->isUnconditional())
712  continue;
713  uint64_t T, F;
714  if (BR->extractProfMetadata(T, F))
715  return true;
716  }
717  return false;
718 }
719 
721 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
722  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
723  auto EntryFreq =
724  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
725  auto OutliningCallFreq =
726  Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
727  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
728  // we outlined any regions, so we may encounter situations where the
729  // OutliningCallFreq is *slightly* bigger than the EntryFreq.
730  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
731  OutliningCallFreq = EntryFreq;
732  }
733  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
734  OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
735 
736  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
737  return OutlineRegionRelFreq;
738 
739  // When profile data is not available, we need to be conservative in
740  // estimating the overall savings. Static branch prediction can usually
741  // guess the branch direction right (taken/non-taken), but the guessed
742  // branch probability is usually not biased enough. In case when the
743  // outlined region is predicted to be likely, its probability needs
744  // to be made higher (more biased) to not under-estimate the cost of
745  // function outlining. On the other hand, if the outlined region
746  // is predicted to be less likely, the predicted probablity is usually
747  // higher than the actual. For instance, the actual probability of the
748  // less likely target is only 5%, but the guessed probablity can be
749  // 40%. In the latter case, there is no need for further adjustement.
750  // FIXME: add an option for this.
751  if (OutlineRegionRelFreq < BranchProbability(45, 100))
752  return OutlineRegionRelFreq;
753 
754  OutlineRegionRelFreq = std::max(
755  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
756 
757  return OutlineRegionRelFreq;
758 }
759 
760 bool PartialInlinerImpl::shouldPartialInline(
761  CallSite CS, FunctionCloner &Cloner,
762  BlockFrequency WeightedOutliningRcost,
764  using namespace ore;
765 
766  Instruction *Call = CS.getInstruction();
768  assert(Callee == Cloner.ClonedFunc);
769 
770  if (SkipCostAnalysis)
771  return isInlineViable(*Callee);
772 
773  Function *Caller = CS.getCaller();
774  auto &CalleeTTI = (*GetTTI)(*Callee);
775  bool RemarksEnabled =
777  DEBUG_TYPE);
778  assert(Call && "invalid callsite for partial inline");
779  InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
780  CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
781  RemarksEnabled ? &ORE : nullptr);
782 
783  if (IC.isAlways()) {
784  ORE.emit([&]() {
785  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
786  << NV("Callee", Cloner.OrigFunc)
787  << " should always be fully inlined, not partially";
788  });
789  return false;
790  }
791 
792  if (IC.isNever()) {
793  ORE.emit([&]() {
794  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
795  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
796  << NV("Caller", Caller)
797  << " because it should never be inlined (cost=never)";
798  });
799  return false;
800  }
801 
802  if (!IC) {
803  ORE.emit([&]() {
804  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
805  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
806  << NV("Caller", Caller) << " because too costly to inline (cost="
807  << NV("Cost", IC.getCost()) << ", threshold="
808  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
809  });
810  return false;
811  }
812  const DataLayout &DL = Caller->getParent()->getDataLayout();
813 
814  // The savings of eliminating the call:
815  int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
816  BlockFrequency NormWeightedSavings(NonWeightedSavings);
817 
818  // Weighted saving is smaller than weighted cost, return false
819  if (NormWeightedSavings < WeightedOutliningRcost) {
820  ORE.emit([&]() {
821  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
822  Call)
823  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
824  << NV("Caller", Caller) << " runtime overhead (overhead="
825  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
826  << ", savings="
827  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
828  << ")"
829  << " of making the outlined call is too high";
830  });
831 
832  return false;
833  }
834 
835  ORE.emit([&]() {
836  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
837  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
838  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
839  << " (threshold="
840  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
841  });
842  return true;
843 }
844 
845 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
846 // For now use a simplified version. The returned 'InlineCost' will be used
847 // to esimate the size cost as well as runtime cost of the BB.
848 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
849  int InlineCost = 0;
850  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
851  for (Instruction &I : BB->instructionsWithoutDebug()) {
852  // Skip free instructions.
853  switch (I.getOpcode()) {
854  case Instruction::BitCast:
855  case Instruction::PtrToInt:
856  case Instruction::IntToPtr:
857  case Instruction::Alloca:
858  case Instruction::PHI:
859  continue;
860  case Instruction::GetElementPtr:
861  if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
862  continue;
863  break;
864  default:
865  break;
866  }
867 
868  if (I.isLifetimeStartOrEnd())
869  continue;
870 
871  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
872  InlineCost += getCallsiteCost(*CI, DL);
873  continue;
874  }
875 
876  if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
877  InlineCost += getCallsiteCost(*II, DL);
878  continue;
879  }
880 
881  if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
882  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
883  continue;
884  }
885  InlineCost += InlineConstants::InstrCost;
886  }
887  return InlineCost;
888 }
889 
890 std::tuple<int, int>
891 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
892  int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
893  for (auto FuncBBPair : Cloner.OutlinedFunctions) {
894  Function *OutlinedFunc = FuncBBPair.first;
895  BasicBlock* OutliningCallBB = FuncBBPair.second;
896  // Now compute the cost of the call sequence to the outlined function
897  // 'OutlinedFunction' in BB 'OutliningCallBB':
898  OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
899 
900  // Now compute the cost of the extracted/outlined function itself:
901  for (BasicBlock &BB : *OutlinedFunc)
902  OutlinedFunctionCost += computeBBInlineCost(&BB);
903  }
904  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
905  "Outlined function cost should be no less than the outlined region");
906 
907  // The code extractor introduces a new root and exit stub blocks with
908  // additional unconditional branches. Those branches will be eliminated
909  // later with bb layout. The cost should be adjusted accordingly:
910  OutlinedFunctionCost -=
911  2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
912 
913  int OutliningRuntimeOverhead =
914  OutliningFuncCallCost +
915  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
917 
918  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
919 }
920 
921 // Create the callsite to profile count map which is
922 // used to update the original function's entry count,
923 // after the function is partially inlined into the callsite.
924 void PartialInlinerImpl::computeCallsiteToProfCountMap(
925  Function *DuplicateFunction,
926  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
927  std::vector<User *> Users(DuplicateFunction->user_begin(),
928  DuplicateFunction->user_end());
929  Function *CurrentCaller = nullptr;
930  std::unique_ptr<BlockFrequencyInfo> TempBFI;
931  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
932 
933  auto ComputeCurrBFI = [&,this](Function *Caller) {
934  // For the old pass manager:
935  if (!GetBFI) {
936  DominatorTree DT(*Caller);
937  LoopInfo LI(DT);
938  BranchProbabilityInfo BPI(*Caller, LI);
939  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
940  CurrentCallerBFI = TempBFI.get();
941  } else {
942  // New pass manager:
943  CurrentCallerBFI = &(*GetBFI)(*Caller);
944  }
945  };
946 
947  for (User *User : Users) {
948  CallSite CS = getCallSite(User);
949  Function *Caller = CS.getCaller();
950  if (CurrentCaller != Caller) {
951  CurrentCaller = Caller;
952  ComputeCurrBFI(Caller);
953  } else {
954  assert(CurrentCallerBFI && "CallerBFI is not set");
955  }
956  BasicBlock *CallBB = CS.getInstruction()->getParent();
957  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
958  if (Count)
959  CallSiteToProfCountMap[User] = *Count;
960  else
961  CallSiteToProfCountMap[User] = 0;
962  }
963 }
964 
965 PartialInlinerImpl::FunctionCloner::FunctionCloner(
966  Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
967  function_ref<AssumptionCache *(Function &)> LookupAC)
968  : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
969  ClonedOI = std::make_unique<FunctionOutliningInfo>();
970 
971  // Clone the function, so that we can hack away on it.
972  ValueToValueMapTy VMap;
973  ClonedFunc = CloneFunction(F, VMap);
974 
975  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
976  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
977  for (BasicBlock *BB : OI->Entries) {
978  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
979  }
980  for (BasicBlock *E : OI->ReturnBlockPreds) {
981  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
982  ClonedOI->ReturnBlockPreds.push_back(NewE);
983  }
984  // Go ahead and update all uses to the duplicate, so that we can just
985  // use the inliner functionality when we're done hacking.
986  F->replaceAllUsesWith(ClonedFunc);
987 }
988 
989 PartialInlinerImpl::FunctionCloner::FunctionCloner(
990  Function *F, FunctionOutliningMultiRegionInfo *OI,
992  function_ref<AssumptionCache *(Function &)> LookupAC)
993  : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
994  ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
995 
996  // Clone the function, so that we can hack away on it.
997  ValueToValueMapTy VMap;
998  ClonedFunc = CloneFunction(F, VMap);
999 
1000  // Go through all Outline Candidate Regions and update all BasicBlock
1001  // information.
1002  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1003  OI->ORI) {
1005  for (BasicBlock *BB : RegionInfo.Region) {
1006  Region.push_back(cast<BasicBlock>(VMap[BB]));
1007  }
1008  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1009  BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1010  BasicBlock *NewReturnBlock = nullptr;
1011  if (RegionInfo.ReturnBlock)
1012  NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1013  FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1014  Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1015  ClonedOMRI->ORI.push_back(MappedRegionInfo);
1016  }
1017  // Go ahead and update all uses to the duplicate, so that we can just
1018  // use the inliner functionality when we're done hacking.
1019  F->replaceAllUsesWith(ClonedFunc);
1020 }
1021 
1022 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1023  auto getFirstPHI = [](BasicBlock *BB) {
1024  BasicBlock::iterator I = BB->begin();
1025  PHINode *FirstPhi = nullptr;
1026  while (I != BB->end()) {
1027  PHINode *Phi = dyn_cast<PHINode>(I);
1028  if (!Phi)
1029  break;
1030  if (!FirstPhi) {
1031  FirstPhi = Phi;
1032  break;
1033  }
1034  }
1035  return FirstPhi;
1036  };
1037 
1038  // Shouldn't need to normalize PHIs if we're not outlining non-early return
1039  // blocks.
1040  if (!ClonedOI)
1041  return;
1042 
1043  // Special hackery is needed with PHI nodes that have inputs from more than
1044  // one extracted block. For simplicity, just split the PHIs into a two-level
1045  // sequence of PHIs, some of which will go in the extracted region, and some
1046  // of which will go outside.
1047  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1048  // only split block when necessary:
1049  PHINode *FirstPhi = getFirstPHI(PreReturn);
1050  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1051 
1052  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1053  return;
1054 
1055  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1056  Value *CommonValue = PN->getIncomingValue(0);
1057  if (all_of(PN->incoming_values(),
1058  [&](Value *V) { return V == CommonValue; }))
1059  return CommonValue;
1060  return nullptr;
1061  };
1062 
1063  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1064  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1065  BasicBlock::iterator I = PreReturn->begin();
1066  Instruction *Ins = &ClonedOI->ReturnBlock->front();
1068  while (I != PreReturn->end()) {
1069  PHINode *OldPhi = dyn_cast<PHINode>(I);
1070  if (!OldPhi)
1071  break;
1072 
1073  PHINode *RetPhi =
1074  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1075  OldPhi->replaceAllUsesWith(RetPhi);
1076  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1077 
1078  RetPhi->addIncoming(&*I, PreReturn);
1079  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1080  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1081  OldPhi->removeIncomingValue(E);
1082  }
1083 
1084  // After incoming values splitting, the old phi may become trivial.
1085  // Keeping the trivial phi can introduce definition inside the outline
1086  // region which is live-out, causing necessary overhead (load, store
1087  // arg passing etc).
1088  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1089  OldPhi->replaceAllUsesWith(OldPhiVal);
1090  DeadPhis.push_back(OldPhi);
1091  }
1092  ++I;
1093  }
1094  for (auto *DP : DeadPhis)
1095  DP->eraseFromParent();
1096 
1097  for (auto E : ClonedOI->ReturnBlockPreds) {
1098  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1099  }
1100 }
1101 
1102 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1103 
1104  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1105  int Cost = 0;
1106  for (BasicBlock* BB : Region)
1107  Cost += computeBBInlineCost(BB);
1108  return Cost;
1109  };
1110 
1111  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1112 
1113  if (ClonedOMRI->ORI.empty())
1114  return false;
1115 
1116  // The CodeExtractor needs a dominator tree.
1117  DominatorTree DT;
1118  DT.recalculate(*ClonedFunc);
1119 
1120  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1121  LoopInfo LI(DT);
1122  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1123  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1124 
1125  // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1126  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1127 
1128  SetVector<Value *> Inputs, Outputs, Sinks;
1129  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1130  ClonedOMRI->ORI) {
1131  int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1132 
1133  CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1134  ClonedFuncBFI.get(), &BPI,
1135  LookupAC(*RegionInfo.EntryBlock->getParent()),
1136  /* AllowVarargs */ false);
1137 
1138  CE.findInputsOutputs(Inputs, Outputs, Sinks);
1139 
1140 #ifndef NDEBUG
1141  if (TracePartialInlining) {
1142  dbgs() << "inputs: " << Inputs.size() << "\n";
1143  dbgs() << "outputs: " << Outputs.size() << "\n";
1144  for (Value *value : Inputs)
1145  dbgs() << "value used in func: " << *value << "\n";
1146  for (Value *output : Outputs)
1147  dbgs() << "instr used in func: " << *output << "\n";
1148  }
1149 #endif
1150  // Do not extract regions that have live exit variables.
1151  if (Outputs.size() > 0 && !ForceLiveExit)
1152  continue;
1153 
1154  Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
1155 
1156  if (OutlinedFunc) {
1157  CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1158  BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1159  assert(OutliningCallBB->getParent() == ClonedFunc);
1160  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1161  NumColdRegionsOutlined++;
1162  OutlinedRegionCost += CurrentOutlinedRegionCost;
1163 
1164  if (MarkOutlinedColdCC) {
1165  OutlinedFunc->setCallingConv(CallingConv::Cold);
1167  }
1168  } else
1169  ORE.emit([&]() {
1170  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1171  &RegionInfo.Region.front()->front())
1172  << "Failed to extract region at block "
1173  << ore::NV("Block", RegionInfo.Region.front());
1174  });
1175  }
1176 
1177  return !OutlinedFunctions.empty();
1178 }
1179 
1180 Function *
1181 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1182  // Returns true if the block is to be partial inlined into the caller
1183  // (i.e. not to be extracted to the out of line function)
1184  auto ToBeInlined = [&, this](BasicBlock *BB) {
1185  return BB == ClonedOI->ReturnBlock ||
1186  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1187  ClonedOI->Entries.end());
1188  };
1189 
1190  assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1191  // The CodeExtractor needs a dominator tree.
1192  DominatorTree DT;
1193  DT.recalculate(*ClonedFunc);
1194 
1195  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1196  LoopInfo LI(DT);
1197  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1198  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1199 
1200  // Gather up the blocks that we're going to extract.
1201  std::vector<BasicBlock *> ToExtract;
1202  ToExtract.push_back(ClonedOI->NonReturnBlock);
1203  OutlinedRegionCost +=
1204  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1205  for (BasicBlock &BB : *ClonedFunc)
1206  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1207  ToExtract.push_back(&BB);
1208  // FIXME: the code extractor may hoist/sink more code
1209  // into the outlined function which may make the outlining
1210  // overhead (the difference of the outlined function cost
1211  // and OutliningRegionCost) look larger.
1212  OutlinedRegionCost += computeBBInlineCost(&BB);
1213  }
1214 
1215  // Extract the body of the if.
1216  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1217  Function *OutlinedFunc =
1218  CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1219  ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1220  /* AllowVarargs */ true)
1221  .extractCodeRegion(CEAC);
1222 
1223  if (OutlinedFunc) {
1224  BasicBlock *OutliningCallBB =
1225  PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1226  .getInstruction()
1227  ->getParent();
1228  assert(OutliningCallBB->getParent() == ClonedFunc);
1229  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1230  } else
1231  ORE.emit([&]() {
1232  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1233  &ToExtract.front()->front())
1234  << "Failed to extract region at block "
1235  << ore::NV("Block", ToExtract.front());
1236  });
1237 
1238  return OutlinedFunc;
1239 }
1240 
1241 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1242  // Ditch the duplicate, since we're done with it, and rewrite all remaining
1243  // users (function pointers, etc.) back to the original function.
1244  ClonedFunc->replaceAllUsesWith(OrigFunc);
1245  ClonedFunc->eraseFromParent();
1246  if (!IsFunctionInlined) {
1247  // Remove each function that was speculatively created if there is no
1248  // reference.
1249  for (auto FuncBBPair : OutlinedFunctions) {
1250  Function *Func = FuncBBPair.first;
1251  Func->eraseFromParent();
1252  }
1253  }
1254 }
1255 
1256 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1257 
1258  if (F->hasAddressTaken())
1259  return {false, nullptr};
1260 
1261  // Let inliner handle it
1262  if (F->hasFnAttribute(Attribute::AlwaysInline))
1263  return {false, nullptr};
1264 
1265  if (F->hasFnAttribute(Attribute::NoInline))
1266  return {false, nullptr};
1267 
1268  if (PSI->isFunctionEntryCold(F))
1269  return {false, nullptr};
1270 
1271  if (F->users().empty())
1272  return {false, nullptr};
1273 
1275 
1276  // Only try to outline cold regions if we have a profile summary, which
1277  // implies we have profiling information.
1278  if (PSI->hasProfileSummary() && F->hasProfileData() &&
1280  std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1281  computeOutliningColdRegionsInfo(F, ORE);
1282  if (OMRI) {
1283  FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
1284 
1285 #ifndef NDEBUG
1286  if (TracePartialInlining) {
1287  dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1288  dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1289  << "\n";
1290  }
1291 #endif
1292  bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1293 
1294  if (DidOutline) {
1295 #ifndef NDEBUG
1296  if (TracePartialInlining) {
1297  dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1298  Cloner.ClonedFunc->print(dbgs());
1299  dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1300  }
1301 #endif
1302 
1303  if (tryPartialInline(Cloner))
1304  return {true, nullptr};
1305  }
1306  }
1307  }
1308 
1309  // Fall-thru to regular partial inlining if we:
1310  // i) can't find any cold regions to outline, or
1311  // ii) can't inline the outlined function anywhere.
1312  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1313  if (!OI)
1314  return {false, nullptr};
1315 
1316  FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
1317  Cloner.NormalizeReturnBlock();
1318 
1319  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1320 
1321  if (!OutlinedFunction)
1322  return {false, nullptr};
1323 
1324  bool AnyInline = tryPartialInline(Cloner);
1325 
1326  if (AnyInline)
1327  return {true, OutlinedFunction};
1328 
1329  return {false, nullptr};
1330 }
1331 
1332 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1333  if (Cloner.OutlinedFunctions.empty())
1334  return false;
1335 
1336  int SizeCost = 0;
1337  BlockFrequency WeightedRcost;
1338  int NonWeightedRcost;
1339  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1340 
1341  // Only calculate RelativeToEntryFreq when we are doing single region
1342  // outlining.
1343  BranchProbability RelativeToEntryFreq;
1344  if (Cloner.ClonedOI) {
1345  RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1346  } else
1347  // RelativeToEntryFreq doesn't make sense when we have more than one
1348  // outlined call because each call will have a different relative frequency
1349  // to the entry block. We can consider using the average, but the
1350  // usefulness of that information is questionable. For now, assume we never
1351  // execute the calls to outlined functions.
1352  RelativeToEntryFreq = BranchProbability(0, 1);
1353 
1354  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1355 
1356  // The call sequence(s) to the outlined function(s) are larger than the sum of
1357  // the original outlined region size(s), it does not increase the chances of
1358  // inlining the function with outlining (The inliner uses the size increase to
1359  // model the cost of inlining a callee).
1360  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1361  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1362  DebugLoc DLoc;
1363  BasicBlock *Block;
1364  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1365  OrigFuncORE.emit([&]() {
1366  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1367  DLoc, Block)
1368  << ore::NV("Function", Cloner.OrigFunc)
1369  << " not partially inlined into callers (Original Size = "
1370  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1371  << ", Size of call sequence to outlined function = "
1372  << ore::NV("NewSize", SizeCost) << ")";
1373  });
1374  return false;
1375  }
1376 
1377  assert(Cloner.OrigFunc->users().empty() &&
1378  "F's users should all be replaced!");
1379 
1380  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1381  Cloner.ClonedFunc->user_end());
1382 
1383  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1384  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1385  if (CalleeEntryCount)
1386  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1387 
1388  uint64_t CalleeEntryCountV =
1389  (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1390 
1391  bool AnyInline = false;
1392  for (User *User : Users) {
1393  CallSite CS = getCallSite(User);
1394 
1395  if (IsLimitReached())
1396  continue;
1397 
1398  OptimizationRemarkEmitter CallerORE(CS.getCaller());
1399  if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
1400  continue;
1401 
1402  // Construct remark before doing the inlining, as after successful inlining
1403  // the callsite is removed.
1404  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1405  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1406  << ore::NV("Caller", CS.getCaller());
1407 
1408  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
1409  // We can only forward varargs when we outlined a single region, else we
1410  // bail on vararg functions.
1411  if (!InlineFunction(CS, IFI, nullptr, true,
1412  (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1413  : nullptr)))
1414  continue;
1415 
1416  CallerORE.emit(OR);
1417 
1418  // Now update the entry count:
1419  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1420  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1421  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1422  }
1423 
1424  AnyInline = true;
1425  NumPartialInlining++;
1426  // Update the stats
1427  if (Cloner.ClonedOI)
1428  NumPartialInlined++;
1429  else
1430  NumColdOutlinePartialInlined++;
1431 
1432  }
1433 
1434  if (AnyInline) {
1435  Cloner.IsFunctionInlined = true;
1436  if (CalleeEntryCount)
1437  Cloner.OrigFunc->setEntryCount(
1438  CalleeEntryCount.setCount(CalleeEntryCountV));
1439  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1440  OrigFuncORE.emit([&]() {
1441  return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1442  << "Partially inlined into at least one caller";
1443  });
1444 
1445  }
1446 
1447  return AnyInline;
1448 }
1449 
1450 bool PartialInlinerImpl::run(Module &M) {
1452  return false;
1453 
1454  std::vector<Function *> Worklist;
1455  Worklist.reserve(M.size());
1456  for (Function &F : M)
1457  if (!F.use_empty() && !F.isDeclaration())
1458  Worklist.push_back(&F);
1459 
1460  bool Changed = false;
1461  while (!Worklist.empty()) {
1462  Function *CurrFunc = Worklist.back();
1463  Worklist.pop_back();
1464 
1465  if (CurrFunc->use_empty())
1466  continue;
1467 
1468  bool Recursive = false;
1469  for (User *U : CurrFunc->users())
1470  if (Instruction *I = dyn_cast<Instruction>(U))
1471  if (I->getParent()->getParent() == CurrFunc) {
1472  Recursive = true;
1473  break;
1474  }
1475  if (Recursive)
1476  continue;
1477 
1478  std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1479  if (Result.second)
1480  Worklist.push_back(Result.second);
1481  Changed |= Result.first;
1482  }
1483 
1484  return Changed;
1485 }
1486 
1488 
1489 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1490  "Partial Inliner", false, false)
1494 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1495  "Partial Inliner", false, false)
1496 
1498  return new PartialInlinerLegacyPass();
1499 }
1500 
1502  ModuleAnalysisManager &AM) {
1503  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1504 
1505  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1506  [&FAM](Function &F) -> AssumptionCache & {
1507  return FAM.getResult<AssumptionAnalysis>(F);
1508  };
1509 
1510  auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1511  return FAM.getCachedResult<AssumptionAnalysis>(F);
1512  };
1513 
1514  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1515  [&FAM](Function &F) -> BlockFrequencyInfo & {
1516  return FAM.getResult<BlockFrequencyAnalysis>(F);
1517  };
1518 
1519  std::function<TargetTransformInfo &(Function &)> GetTTI =
1520  [&FAM](Function &F) -> TargetTransformInfo & {
1521  return FAM.getResult<TargetIRAnalysis>(F);
1522  };
1523 
1525 
1526  if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
1527  {GetBFI}, PSI)
1528  .run(M))
1529  return PreservedAnalyses::none();
1530  return PreservedAnalyses::all();
1531 }
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
Diagnostic information for missed-optimization remarks.
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:267
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
partial Partial Inliner
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function&#39;s module.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:324
const BasicBlock & back() const
Definition: Function.h:689
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
F(f)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
iv Induction Variable Users
Definition: IVUsers.cpp:51
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
Represents the cost of inlining a function.
Definition: InlineCost.h:63
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
InstrTy * getInstruction() const
Definition: CallSite.h:96
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:275
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:20
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:216
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isAlways() const
Definition: InlineCost.h:103
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:658
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:240
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
Definition: BasicBlock.cpp:94
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
Definition: BasicBlock.h:285
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:30
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
virtual bool isMissedOptRemarkEnabled(StringRef PassName) const
Return true if missed optimization remarks are enabled, override to provide different implementation...
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:389
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:275
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
AssumptionCache * lookupAssumptionCache(Function &F)
Return the cached assumptions for a function if it has already been scanned.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
amdgpu Simplify well known AMD library false FunctionCallee Callee
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:101
iterator_range< user_iterator > users()
Definition: Value.h:420
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:109
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:130
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
unsigned succ_size(const Instruction *I)
Definition: CFG.h:256
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
size_t size() const
Definition: Module.h:606
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:226
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1420
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:396
const BasicBlock & front() const
Definition: Function.h:687
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
succ_range successors(Instruction *I)
Definition: CFG.h:259
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1311
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:343
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044
user_iterator user_end()
Definition: Value.h:404
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1224
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275