LLVM  7.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/DiagnosticInfo.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
52 #include "llvm/Transforms/IPO.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <cstdint>
59 #include <functional>
60 #include <iterator>
61 #include <memory>
62 #include <tuple>
63 #include <vector>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "partial-inlining"
68 
69 STATISTIC(NumPartialInlined,
70  "Number of callsites functions partially inlined into.");
71 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
72  "cold outlined regions were partially "
73  "inlined into its caller(s).");
74 STATISTIC(NumColdRegionsFound,
75  "Number of cold single entry/exit regions found.");
76 STATISTIC(NumColdRegionsOutlined,
77  "Number of cold single entry/exit regions outlined.");
78 
79 // Command line option to disable partial-inlining. The default is false:
80 static cl::opt<bool>
81  DisablePartialInlining("disable-partial-inlining", cl::init(false),
82  cl::Hidden, cl::desc("Disable partial inlining"));
83 // Command line option to disable multi-region partial-inlining. The default is
84 // false:
86  "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
87  cl::desc("Disable multi-region partial inlining"));
88 
89 // Command line option to force outlining in regions with live exit variables.
90 // The default is false:
91 static cl::opt<bool>
92  ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
93  cl::desc("Force outline regions with live exits"));
94 
95 // Command line option to enable marking outline functions with Cold Calling
96 // Convention. The default is false:
97 static cl::opt<bool>
98  MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
99  cl::desc("Mark outline function calls with ColdCC"));
100 
101 #ifndef NDEBUG
102 // Command line option to debug partial-inlining. The default is none:
103 static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
104  cl::init(false), cl::Hidden,
105  cl::desc("Trace partial inlining."));
106 #endif
107 
108 // This is an option used by testing:
109 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
110  cl::init(false), cl::ZeroOrMore,
112  cl::desc("Skip Cost Analysis"));
113 // Used to determine if a cold region is worth outlining based on
114 // its inlining cost compared to the original function. Default is set at 10%.
115 // ie. if the cold region reduces the inlining cost of the original function by
116 // at least 10%.
118  "min-region-size-ratio", cl::init(0.1), cl::Hidden,
119  cl::desc("Minimum ratio comparing relative sizes of each "
120  "outline candidate and original function"));
121 // Used to tune the minimum number of execution counts needed in the predecessor
122 // block to the cold edge. ie. confidence interval.
123 static cl::opt<unsigned>
124  MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
125  cl::desc("Minimum block executions to consider "
126  "its BranchProbabilityInfo valid"));
127 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
128 // if the branch probability is 10% or less, then it is deemed as 'cold'.
130  "cold-branch-ratio", cl::init(0.1), cl::Hidden,
131  cl::desc("Minimum BranchProbability to consider a region cold."));
132 
134  "max-num-inline-blocks", cl::init(5), cl::Hidden,
135  cl::desc("Max number of blocks to be partially inlined"));
136 
137 // Command line option to set the maximum number of partial inlining allowed
138 // for the module. The default value of -1 means no limit.
140  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
141  cl::desc("Max number of partial inlining. The default is unlimited"));
142 
143 // Used only when PGO or user annotated branch data is absent. It is
144 // the least value that is used to weigh the outline region. If BFI
145 // produces larger value, the BFI value will be used.
146 static cl::opt<int>
147  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
149  cl::desc("Relative frequency of outline region to "
150  "the entry block"));
151 
153  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
154  cl::desc("A debug option to add additional penalty to the computed one."));
155 
156 namespace {
157 
158 struct FunctionOutliningInfo {
159  FunctionOutliningInfo() = default;
160 
161  // Returns the number of blocks to be inlined including all blocks
162  // in Entries and one return block.
163  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164 
165  // A set of blocks including the function entry that guard
166  // the region to be outlined.
168 
169  // The return block that is not included in the outlined region.
170  BasicBlock *ReturnBlock = nullptr;
171 
172  // The dominating block of the region to be outlined.
173  BasicBlock *NonReturnBlock = nullptr;
174 
175  // The set of blocks in Entries that that are predecessors to ReturnBlock
176  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177 };
178 
179 struct FunctionOutliningMultiRegionInfo {
180  FunctionOutliningMultiRegionInfo()
181  : ORI() {}
182 
183  // Container for outline regions
184  struct OutlineRegionInfo {
185  OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
186  BasicBlock *EntryBlock, BasicBlock *ExitBlock,
187  BasicBlock *ReturnBlock)
188  : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
189  ReturnBlock(ReturnBlock) {}
191  BasicBlock *EntryBlock;
192  BasicBlock *ExitBlock;
193  BasicBlock *ReturnBlock;
194  };
195 
197 };
198 
199 struct PartialInlinerImpl {
200 
201  PartialInlinerImpl(
205  ProfileSummaryInfo *ProfSI,
207  : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI),
208  GetORE(GORE) {}
209 
210  bool run(Module &M);
211  // Main part of the transformation that calls helper functions to find
212  // outlining candidates, clone & outline the function, and attempt to
213  // partially inline the resulting function. Returns true if
214  // inlining was successful, false otherwise. Also returns the outline
215  // function (only if we partially inlined early returns) as there is a
216  // possibility to further "peel" early return statements that were left in the
217  // outline function due to code size.
218  std::pair<bool, Function *> unswitchFunction(Function *F);
219 
220  // This class speculatively clones the the function to be partial inlined.
221  // At the end of partial inlining, the remaining callsites to the cloned
222  // function that are not partially inlined will be fixed up to reference
223  // the original function, and the cloned function will be erased.
224  struct FunctionCloner {
225  // Two constructors, one for single region outlining, the other for
226  // multi-region outlining.
227  FunctionCloner(Function *F, FunctionOutliningInfo *OI,
229  FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
231  ~FunctionCloner();
232 
233  // Prepare for function outlining: making sure there is only
234  // one incoming edge from the extracted/outlined region to
235  // the return block.
236  void NormalizeReturnBlock();
237 
238  // Do function outlining for cold regions.
239  bool doMultiRegionFunctionOutlining();
240  // Do function outlining for region after early return block(s).
241  // NOTE: For vararg functions that do the vararg handling in the outlined
242  // function, we temporarily generate IR that does not properly
243  // forward varargs to the outlined function. Calling InlineFunction
244  // will update calls to the outlined functions to properly forward
245  // the varargs.
246  Function *doSingleRegionFunctionOutlining();
247 
248  Function *OrigFunc = nullptr;
249  Function *ClonedFunc = nullptr;
250 
251  typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
252  // Keep track of Outlined Functions and the basic block they're called from.
253  SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
254 
255  // ClonedFunc is inlined in one of its callers after function
256  // outlining.
257  bool IsFunctionInlined = false;
258  // The cost of the region to be outlined.
259  int OutlinedRegionCost = 0;
260  // ClonedOI is specific to outlining non-early return blocks.
261  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
262  // ClonedOMRI is specific to outlining cold regions.
263  std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
264  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
266  };
267 
268 private:
269  int NumPartialInlining = 0;
270  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
271  std::function<TargetTransformInfo &(Function &)> *GetTTI;
273  ProfileSummaryInfo *PSI;
274  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
275 
276  // Return the frequency of the OutlininingBB relative to F's entry point.
277  // The result is no larger than 1 and is represented using BP.
278  // (Note that the outlined region's 'head' block can only have incoming
279  // edges from the guarding entry blocks).
280  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
281 
282  // Return true if the callee of CS should be partially inlined with
283  // profit.
284  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
285  BlockFrequency WeightedOutliningRcost);
286 
287  // Try to inline DuplicateFunction (cloned from F with call to
288  // the OutlinedFunction into its callers. Return true
289  // if there is any successful inlining.
290  bool tryPartialInline(FunctionCloner &Cloner);
291 
292  // Compute the mapping from use site of DuplicationFunction to the enclosing
293  // BB's profile count.
294  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
295  DenseMap<User *, uint64_t> &SiteCountMap);
296 
297  bool IsLimitReached() {
298  return (MaxNumPartialInlining != -1 &&
299  NumPartialInlining >= MaxNumPartialInlining);
300  }
301 
302  static CallSite getCallSite(User *U) {
303  CallSite CS;
304  if (CallInst *CI = dyn_cast<CallInst>(U))
305  CS = CallSite(CI);
306  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
307  CS = CallSite(II);
308  else
309  llvm_unreachable("All uses must be calls");
310  return CS;
311  }
312 
313  static CallSite getOneCallSiteTo(Function *F) {
314  User *User = *F->user_begin();
315  return getCallSite(User);
316  }
317 
318  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
319  CallSite CS = getOneCallSiteTo(F);
320  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
321  BasicBlock *Block = CS.getParent();
322  return std::make_tuple(DLoc, Block);
323  }
324 
325  // Returns the costs associated with function outlining:
326  // - The first value is the non-weighted runtime cost for making the call
327  // to the outlined function, including the addtional setup cost in the
328  // outlined function itself;
329  // - The second value is the estimated size of the new call sequence in
330  // basic block Cloner.OutliningCallBB;
331  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
332 
333  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
334  // approximate both the size and runtime cost (Note that in the current
335  // inline cost analysis, there is no clear distinction there either).
336  static int computeBBInlineCost(BasicBlock *BB);
337 
338  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
339  std::unique_ptr<FunctionOutliningMultiRegionInfo>
340  computeOutliningColdRegionsInfo(Function *F);
341 };
342 
343 struct PartialInlinerLegacyPass : public ModulePass {
344  static char ID; // Pass identification, replacement for typeid
345 
346  PartialInlinerLegacyPass() : ModulePass(ID) {
348  }
349 
350  void getAnalysisUsage(AnalysisUsage &AU) const override {
354  }
355 
356  bool runOnModule(Module &M) override {
357  if (skipModule(M))
358  return false;
359 
360  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
362  &getAnalysis<TargetTransformInfoWrapperPass>();
363  ProfileSummaryInfo *PSI =
364  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
365  std::unique_ptr<OptimizationRemarkEmitter> UPORE;
366 
367  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
368  [&ACT](Function &F) -> AssumptionCache & {
369  return ACT->getAssumptionCache(F);
370  };
371 
372  std::function<TargetTransformInfo &(Function &)> GetTTI =
373  [&TTIWP](Function &F) -> TargetTransformInfo & {
374  return TTIWP->getTTI(F);
375  };
376 
377  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
378  [&UPORE](Function &F) -> OptimizationRemarkEmitter & {
379  UPORE.reset(new OptimizationRemarkEmitter(&F));
380  return *UPORE.get();
381  };
382 
383  return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI,
384  &GetORE)
385  .run(M);
386  }
387 };
388 
389 } // end anonymous namespace
390 
391 std::unique_ptr<FunctionOutliningMultiRegionInfo>
392 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
393  BasicBlock *EntryBlock = &F->front();
394 
395  DominatorTree DT(*F);
396  LoopInfo LI(DT);
397  BranchProbabilityInfo BPI(*F, LI);
398  std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
400  if (!GetBFI) {
401  ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
402  BFI = ScopedBFI.get();
403  } else
404  BFI = &(*GetBFI)(*F);
405 
406  auto &ORE = (*GetORE)(*F);
407 
408  // Return if we don't have profiling information.
409  if (!PSI->hasInstrumentationProfile())
410  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
411 
412  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
413  llvm::make_unique<FunctionOutliningMultiRegionInfo>();
414 
415  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
416  BasicBlock *Dom = BlockList.front();
417  return BlockList.size() > 1 &&
418  std::distance(pred_begin(Dom), pred_end(Dom)) == 1;
419  };
420 
421  auto IsSingleExit =
422  [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
423  BasicBlock *ExitBlock = nullptr;
424  for (auto *Block : BlockList) {
425  for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
426  if (!is_contained(BlockList, *SI)) {
427  if (ExitBlock) {
428  ORE.emit([&]() {
429  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
430  &SI->front())
431  << "Region dominated by "
432  << ore::NV("Block", BlockList.front()->getName())
433  << " has more than one region exit edge.";
434  });
435  return nullptr;
436  } else
437  ExitBlock = Block;
438  }
439  }
440  }
441  return ExitBlock;
442  };
443 
444  auto BBProfileCount = [BFI](BasicBlock *BB) {
445  return BFI->getBlockProfileCount(BB)
446  ? BFI->getBlockProfileCount(BB).getValue()
447  : 0;
448  };
449 
450  // Use the same computeBBInlineCost function to compute the cost savings of
451  // the outlining the candidate region.
452  int OverallFunctionCost = 0;
453  for (auto &BB : *F)
454  OverallFunctionCost += computeBBInlineCost(&BB);
455 
456 #ifndef NDEBUG
458  dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
459 #endif
460  int MinOutlineRegionCost =
461  static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
462  BranchProbability MinBranchProbability(
463  static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
464  MinBlockCounterExecution);
465  bool ColdCandidateFound = false;
466  BasicBlock *CurrEntry = EntryBlock;
467  std::vector<BasicBlock *> DFS;
468  DenseMap<BasicBlock *, bool> VisitedMap;
469  DFS.push_back(CurrEntry);
470  VisitedMap[CurrEntry] = true;
471  // Use Depth First Search on the basic blocks to find CFG edges that are
472  // considered cold.
473  // Cold regions considered must also have its inline cost compared to the
474  // overall inline cost of the original function. The region is outlined only
475  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
476  // more.
477  while (!DFS.empty()) {
478  auto *thisBB = DFS.back();
479  DFS.pop_back();
480  // Only consider regions with predecessor blocks that are considered
481  // not-cold (default: part of the top 99.99% of all block counters)
482  // AND greater than our minimum block execution count (default: 100).
483  if (PSI->isColdBB(thisBB, BFI) ||
484  BBProfileCount(thisBB) < MinBlockCounterExecution)
485  continue;
486  for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
487  if (VisitedMap[*SI])
488  continue;
489  VisitedMap[*SI] = true;
490  DFS.push_back(*SI);
491  // If branch isn't cold, we skip to the next one.
492  BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
493  if (SuccProb > MinBranchProbability)
494  continue;
495 #ifndef NDEBUG
496  if (TracePartialInlining) {
497  dbgs() << "Found cold edge: " << thisBB->getName() << "->"
498  << (*SI)->getName() << "\nBranch Probability = " << SuccProb
499  << "\n";
500  }
501 #endif
502  SmallVector<BasicBlock *, 8> DominateVector;
503  DT.getDescendants(*SI, DominateVector);
504  // We can only outline single entry regions (for now).
505  if (!IsSingleEntry(DominateVector))
506  continue;
507  BasicBlock *ExitBlock = nullptr;
508  // We can only outline single exit regions (for now).
509  if (!(ExitBlock = IsSingleExit(DominateVector)))
510  continue;
511  int OutlineRegionCost = 0;
512  for (auto *BB : DominateVector)
513  OutlineRegionCost += computeBBInlineCost(BB);
514 
515 #ifndef NDEBUG
517  dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
518 #endif
519 
520  if (OutlineRegionCost < MinOutlineRegionCost) {
521  ORE.emit([&]() {
522  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
523  &SI->front())
524  << ore::NV("Callee", F) << " inline cost-savings smaller than "
525  << ore::NV("Cost", MinOutlineRegionCost);
526  });
527  continue;
528  }
529  // For now, ignore blocks that belong to a SISE region that is a
530  // candidate for outlining. In the future, we may want to look
531  // at inner regions because the outer region may have live-exit
532  // variables.
533  for (auto *BB : DominateVector)
534  VisitedMap[BB] = true;
535  // ReturnBlock here means the block after the outline call
536  BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
537  // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
538  FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
539  DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
540  RegInfo.Region = DominateVector;
541  OutliningInfo->ORI.push_back(RegInfo);
542 #ifndef NDEBUG
543  if (TracePartialInlining) {
544  dbgs() << "Found Cold Candidate starting at block: "
545  << DominateVector.front()->getName() << "\n";
546  }
547 #endif
548  ColdCandidateFound = true;
549  NumColdRegionsFound++;
550  }
551  }
552  if (ColdCandidateFound)
553  return OutliningInfo;
554  else
555  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
556 }
557 
558 std::unique_ptr<FunctionOutliningInfo>
559 PartialInlinerImpl::computeOutliningInfo(Function *F) {
560  BasicBlock *EntryBlock = &F->front();
561  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
562  if (!BR || BR->isUnconditional())
563  return std::unique_ptr<FunctionOutliningInfo>();
564 
565  // Returns true if Succ is BB's successor
566  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
567  return is_contained(successors(BB), Succ);
568  };
569 
570  auto SuccSize = [](BasicBlock *BB) {
571  return std::distance(succ_begin(BB), succ_end(BB));
572  };
573 
574  auto IsReturnBlock = [](BasicBlock *BB) {
575  TerminatorInst *TI = BB->getTerminator();
576  return isa<ReturnInst>(TI);
577  };
578 
579  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
580  if (IsReturnBlock(Succ1))
581  return std::make_tuple(Succ1, Succ2);
582  if (IsReturnBlock(Succ2))
583  return std::make_tuple(Succ2, Succ1);
584 
585  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
586  };
587 
588  // Detect a triangular shape:
589  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
590  if (IsSuccessor(Succ1, Succ2))
591  return std::make_tuple(Succ1, Succ2);
592  if (IsSuccessor(Succ2, Succ1))
593  return std::make_tuple(Succ2, Succ1);
594 
595  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
596  };
597 
598  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
599  llvm::make_unique<FunctionOutliningInfo>();
600 
601  BasicBlock *CurrEntry = EntryBlock;
602  bool CandidateFound = false;
603  do {
604  // The number of blocks to be inlined has already reached
605  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
606  // disables partial inlining for the function.
607  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
608  break;
609 
610  if (SuccSize(CurrEntry) != 2)
611  break;
612 
613  BasicBlock *Succ1 = *succ_begin(CurrEntry);
614  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
615 
616  BasicBlock *ReturnBlock, *NonReturnBlock;
617  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
618 
619  if (ReturnBlock) {
620  OutliningInfo->Entries.push_back(CurrEntry);
621  OutliningInfo->ReturnBlock = ReturnBlock;
622  OutliningInfo->NonReturnBlock = NonReturnBlock;
623  CandidateFound = true;
624  break;
625  }
626 
627  BasicBlock *CommSucc;
629  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
630 
631  if (!CommSucc)
632  break;
633 
634  OutliningInfo->Entries.push_back(CurrEntry);
635  CurrEntry = OtherSucc;
636  } while (true);
637 
638  if (!CandidateFound)
639  return std::unique_ptr<FunctionOutliningInfo>();
640 
641  // Do sanity check of the entries: threre should not
642  // be any successors (not in the entry set) other than
643  // {ReturnBlock, NonReturnBlock}
644  assert(OutliningInfo->Entries[0] == &F->front() &&
645  "Function Entry must be the first in Entries vector");
646  DenseSet<BasicBlock *> Entries;
647  for (BasicBlock *E : OutliningInfo->Entries)
648  Entries.insert(E);
649 
650  // Returns true of BB has Predecessor which is not
651  // in Entries set.
652  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
653  for (auto Pred : predecessors(BB)) {
654  if (!Entries.count(Pred))
655  return true;
656  }
657  return false;
658  };
659  auto CheckAndNormalizeCandidate =
660  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
661  for (BasicBlock *E : OutliningInfo->Entries) {
662  for (auto Succ : successors(E)) {
663  if (Entries.count(Succ))
664  continue;
665  if (Succ == OutliningInfo->ReturnBlock)
666  OutliningInfo->ReturnBlockPreds.push_back(E);
667  else if (Succ != OutliningInfo->NonReturnBlock)
668  return false;
669  }
670  // There should not be any outside incoming edges either:
671  if (HasNonEntryPred(E))
672  return false;
673  }
674  return true;
675  };
676 
677  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
678  return std::unique_ptr<FunctionOutliningInfo>();
679 
680  // Now further growing the candidate's inlining region by
681  // peeling off dominating blocks from the outlining region:
682  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
683  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
684  if (SuccSize(Cand) != 2)
685  break;
686 
687  if (HasNonEntryPred(Cand))
688  break;
689 
690  BasicBlock *Succ1 = *succ_begin(Cand);
691  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
692 
693  BasicBlock *ReturnBlock, *NonReturnBlock;
694  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
695  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
696  break;
697 
698  if (NonReturnBlock->getSinglePredecessor() != Cand)
699  break;
700 
701  // Now grow and update OutlininigInfo:
702  OutliningInfo->Entries.push_back(Cand);
703  OutliningInfo->NonReturnBlock = NonReturnBlock;
704  OutliningInfo->ReturnBlockPreds.push_back(Cand);
705  Entries.insert(Cand);
706  }
707 
708  return OutliningInfo;
709 }
710 
711 // Check if there is PGO data or user annoated branch data:
712 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
713  if (F->hasProfileData())
714  return true;
715  // Now check if any of the entry block has MD_prof data:
716  for (auto *E : OI->Entries) {
718  if (!BR || BR->isUnconditional())
719  continue;
720  uint64_t T, F;
721  if (BR->extractProfMetadata(T, F))
722  return true;
723  }
724  return false;
725 }
726 
728 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
729  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
730  auto EntryFreq =
731  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
732  auto OutliningCallFreq =
733  Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
734  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
735  // we outlined any regions, so we may encounter situations where the
736  // OutliningCallFreq is *slightly* bigger than the EntryFreq.
737  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
738  OutliningCallFreq = EntryFreq;
739  }
740  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
741  OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
742 
743  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
744  return OutlineRegionRelFreq;
745 
746  // When profile data is not available, we need to be conservative in
747  // estimating the overall savings. Static branch prediction can usually
748  // guess the branch direction right (taken/non-taken), but the guessed
749  // branch probability is usually not biased enough. In case when the
750  // outlined region is predicted to be likely, its probability needs
751  // to be made higher (more biased) to not under-estimate the cost of
752  // function outlining. On the other hand, if the outlined region
753  // is predicted to be less likely, the predicted probablity is usually
754  // higher than the actual. For instance, the actual probability of the
755  // less likely target is only 5%, but the guessed probablity can be
756  // 40%. In the latter case, there is no need for further adjustement.
757  // FIXME: add an option for this.
758  if (OutlineRegionRelFreq < BranchProbability(45, 100))
759  return OutlineRegionRelFreq;
760 
761  OutlineRegionRelFreq = std::max(
762  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
763 
764  return OutlineRegionRelFreq;
765 }
766 
767 bool PartialInlinerImpl::shouldPartialInline(
768  CallSite CS, FunctionCloner &Cloner,
769  BlockFrequency WeightedOutliningRcost) {
770  using namespace ore;
771 
772  if (SkipCostAnalysis)
773  return true;
774 
775  Instruction *Call = CS.getInstruction();
777  assert(Callee == Cloner.ClonedFunc);
778 
779  Function *Caller = CS.getCaller();
780  auto &CalleeTTI = (*GetTTI)(*Callee);
781  auto &ORE = (*GetORE)(*Caller);
782  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
783  *GetAssumptionCache, GetBFI, PSI, &ORE);
784 
785  if (IC.isAlways()) {
786  ORE.emit([&]() {
787  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
788  << NV("Callee", Cloner.OrigFunc)
789  << " should always be fully inlined, not partially";
790  });
791  return false;
792  }
793 
794  if (IC.isNever()) {
795  ORE.emit([&]() {
796  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
797  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
798  << NV("Caller", Caller)
799  << " because it should never be inlined (cost=never)";
800  });
801  return false;
802  }
803 
804  if (!IC) {
805  ORE.emit([&]() {
806  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
807  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
808  << NV("Caller", Caller) << " because too costly to inline (cost="
809  << NV("Cost", IC.getCost()) << ", threshold="
810  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
811  });
812  return false;
813  }
814  const DataLayout &DL = Caller->getParent()->getDataLayout();
815 
816  // The savings of eliminating the call:
817  int NonWeightedSavings = getCallsiteCost(CS, DL);
818  BlockFrequency NormWeightedSavings(NonWeightedSavings);
819 
820  // Weighted saving is smaller than weighted cost, return false
821  if (NormWeightedSavings < WeightedOutliningRcost) {
822  ORE.emit([&]() {
823  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
824  Call)
825  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
826  << NV("Caller", Caller) << " runtime overhead (overhead="
827  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
828  << ", savings="
829  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
830  << ")"
831  << " of making the outlined call is too high";
832  });
833 
834  return false;
835  }
836 
837  ORE.emit([&]() {
838  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
839  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
840  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
841  << " (threshold="
842  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
843  });
844  return true;
845 }
846 
847 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
848 // For now use a simplified version. The returned 'InlineCost' will be used
849 // to esimate the size cost as well as runtime cost of the BB.
850 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
851  int InlineCost = 0;
852  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
853  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
854  if (isa<DbgInfoIntrinsic>(I))
855  continue;
856 
857  switch (I->getOpcode()) {
858  case Instruction::BitCast:
859  case Instruction::PtrToInt:
860  case Instruction::IntToPtr:
861  case Instruction::Alloca:
862  continue;
863  case Instruction::GetElementPtr:
864  if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
865  continue;
866  break;
867  default:
868  break;
869  }
870 
871  IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
872  if (IntrInst) {
873  if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
874  IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
875  continue;
876  }
877 
878  if (CallInst *CI = dyn_cast<CallInst>(I)) {
879  InlineCost += getCallsiteCost(CallSite(CI), DL);
880  continue;
881  }
882 
883  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
884  InlineCost += getCallsiteCost(CallSite(II), DL);
885  continue;
886  }
887 
888  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
889  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
890  continue;
891  }
892  InlineCost += InlineConstants::InstrCost;
893  }
894  return InlineCost;
895 }
896 
897 std::tuple<int, int>
898 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
899  int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
900  for (auto FuncBBPair : Cloner.OutlinedFunctions) {
901  Function *OutlinedFunc = FuncBBPair.first;
902  BasicBlock* OutliningCallBB = FuncBBPair.second;
903  // Now compute the cost of the call sequence to the outlined function
904  // 'OutlinedFunction' in BB 'OutliningCallBB':
905  OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
906 
907  // Now compute the cost of the extracted/outlined function itself:
908  for (BasicBlock &BB : *OutlinedFunc)
909  OutlinedFunctionCost += computeBBInlineCost(&BB);
910  }
911  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
912  "Outlined function cost should be no less than the outlined region");
913 
914  // The code extractor introduces a new root and exit stub blocks with
915  // additional unconditional branches. Those branches will be eliminated
916  // later with bb layout. The cost should be adjusted accordingly:
917  OutlinedFunctionCost -=
918  2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
919 
920  int OutliningRuntimeOverhead =
921  OutliningFuncCallCost +
922  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
924 
925  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
926 }
927 
928 // Create the callsite to profile count map which is
929 // used to update the original function's entry count,
930 // after the function is partially inlined into the callsite.
931 void PartialInlinerImpl::computeCallsiteToProfCountMap(
932  Function *DuplicateFunction,
933  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
934  std::vector<User *> Users(DuplicateFunction->user_begin(),
935  DuplicateFunction->user_end());
936  Function *CurrentCaller = nullptr;
937  std::unique_ptr<BlockFrequencyInfo> TempBFI;
938  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
939 
940  auto ComputeCurrBFI = [&,this](Function *Caller) {
941  // For the old pass manager:
942  if (!GetBFI) {
943  DominatorTree DT(*Caller);
944  LoopInfo LI(DT);
945  BranchProbabilityInfo BPI(*Caller, LI);
946  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
947  CurrentCallerBFI = TempBFI.get();
948  } else {
949  // New pass manager:
950  CurrentCallerBFI = &(*GetBFI)(*Caller);
951  }
952  };
953 
954  for (User *User : Users) {
955  CallSite CS = getCallSite(User);
956  Function *Caller = CS.getCaller();
957  if (CurrentCaller != Caller) {
958  CurrentCaller = Caller;
959  ComputeCurrBFI(Caller);
960  } else {
961  assert(CurrentCallerBFI && "CallerBFI is not set");
962  }
963  BasicBlock *CallBB = CS.getInstruction()->getParent();
964  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
965  if (Count)
966  CallSiteToProfCountMap[User] = *Count;
967  else
968  CallSiteToProfCountMap[User] = 0;
969  }
970 }
971 
972 PartialInlinerImpl::FunctionCloner::FunctionCloner(
973  Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
974  : OrigFunc(F), ORE(ORE) {
975  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
976 
977  // Clone the function, so that we can hack away on it.
978  ValueToValueMapTy VMap;
979  ClonedFunc = CloneFunction(F, VMap);
980 
981  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
982  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
983  for (BasicBlock *BB : OI->Entries) {
984  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
985  }
986  for (BasicBlock *E : OI->ReturnBlockPreds) {
987  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
988  ClonedOI->ReturnBlockPreds.push_back(NewE);
989  }
990  // Go ahead and update all uses to the duplicate, so that we can just
991  // use the inliner functionality when we're done hacking.
992  F->replaceAllUsesWith(ClonedFunc);
993 }
994 
995 PartialInlinerImpl::FunctionCloner::FunctionCloner(
996  Function *F, FunctionOutliningMultiRegionInfo *OI,
998  : OrigFunc(F), ORE(ORE) {
999  ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
1000 
1001  // Clone the function, so that we can hack away on it.
1002  ValueToValueMapTy VMap;
1003  ClonedFunc = CloneFunction(F, VMap);
1004 
1005  // Go through all Outline Candidate Regions and update all BasicBlock
1006  // information.
1007  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1008  OI->ORI) {
1010  for (BasicBlock *BB : RegionInfo.Region) {
1011  Region.push_back(cast<BasicBlock>(VMap[BB]));
1012  }
1013  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1014  BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1015  BasicBlock *NewReturnBlock = nullptr;
1016  if (RegionInfo.ReturnBlock)
1017  NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1018  FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1019  Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1020  ClonedOMRI->ORI.push_back(MappedRegionInfo);
1021  }
1022  // Go ahead and update all uses to the duplicate, so that we can just
1023  // use the inliner functionality when we're done hacking.
1024  F->replaceAllUsesWith(ClonedFunc);
1025 }
1026 
1027 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1028  auto getFirstPHI = [](BasicBlock *BB) {
1029  BasicBlock::iterator I = BB->begin();
1030  PHINode *FirstPhi = nullptr;
1031  while (I != BB->end()) {
1032  PHINode *Phi = dyn_cast<PHINode>(I);
1033  if (!Phi)
1034  break;
1035  if (!FirstPhi) {
1036  FirstPhi = Phi;
1037  break;
1038  }
1039  }
1040  return FirstPhi;
1041  };
1042 
1043  // Shouldn't need to normalize PHIs if we're not outlining non-early return
1044  // blocks.
1045  if (!ClonedOI)
1046  return;
1047 
1048  // Special hackery is needed with PHI nodes that have inputs from more than
1049  // one extracted block. For simplicity, just split the PHIs into a two-level
1050  // sequence of PHIs, some of which will go in the extracted region, and some
1051  // of which will go outside.
1052  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1053  // only split block when necessary:
1054  PHINode *FirstPhi = getFirstPHI(PreReturn);
1055  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1056 
1057  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1058  return;
1059 
1060  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1061  Value *CommonValue = PN->getIncomingValue(0);
1062  if (all_of(PN->incoming_values(),
1063  [&](Value *V) { return V == CommonValue; }))
1064  return CommonValue;
1065  return nullptr;
1066  };
1067 
1068  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1069  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1070  BasicBlock::iterator I = PreReturn->begin();
1071  Instruction *Ins = &ClonedOI->ReturnBlock->front();
1073  while (I != PreReturn->end()) {
1074  PHINode *OldPhi = dyn_cast<PHINode>(I);
1075  if (!OldPhi)
1076  break;
1077 
1078  PHINode *RetPhi =
1079  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1080  OldPhi->replaceAllUsesWith(RetPhi);
1081  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1082 
1083  RetPhi->addIncoming(&*I, PreReturn);
1084  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1085  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1086  OldPhi->removeIncomingValue(E);
1087  }
1088 
1089  // After incoming values splitting, the old phi may become trivial.
1090  // Keeping the trivial phi can introduce definition inside the outline
1091  // region which is live-out, causing necessary overhead (load, store
1092  // arg passing etc).
1093  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1094  OldPhi->replaceAllUsesWith(OldPhiVal);
1095  DeadPhis.push_back(OldPhi);
1096  }
1097  ++I;
1098  }
1099  for (auto *DP : DeadPhis)
1100  DP->eraseFromParent();
1101 
1102  for (auto E : ClonedOI->ReturnBlockPreds) {
1103  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1104  }
1105 }
1106 
1107 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1108 
1109  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1110  int Cost = 0;
1111  for (BasicBlock* BB : Region)
1112  Cost += computeBBInlineCost(BB);
1113  return Cost;
1114  };
1115 
1116  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1117 
1118  if (ClonedOMRI->ORI.empty())
1119  return false;
1120 
1121  // The CodeExtractor needs a dominator tree.
1122  DominatorTree DT;
1123  DT.recalculate(*ClonedFunc);
1124 
1125  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1126  LoopInfo LI(DT);
1127  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1128  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1129 
1130  SetVector<Value *> Inputs, Outputs, Sinks;
1131  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1132  ClonedOMRI->ORI) {
1133  int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1134 
1135  CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1136  ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
1137 
1138  CE.findInputsOutputs(Inputs, Outputs, Sinks);
1139 
1140 #ifndef NDEBUG
1141  if (TracePartialInlining) {
1142  dbgs() << "inputs: " << Inputs.size() << "\n";
1143  dbgs() << "outputs: " << Outputs.size() << "\n";
1144  for (Value *value : Inputs)
1145  dbgs() << "value used in func: " << *value << "\n";
1146  for (Value *output : Outputs)
1147  dbgs() << "instr used in func: " << *output << "\n";
1148  }
1149 #endif
1150  // Do not extract regions that have live exit variables.
1151  if (Outputs.size() > 0 && !ForceLiveExit)
1152  continue;
1153 
1154  Function *OutlinedFunc = CE.extractCodeRegion();
1155 
1156  if (OutlinedFunc) {
1157  CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1158  BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1159  assert(OutliningCallBB->getParent() == ClonedFunc);
1160  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1161  NumColdRegionsOutlined++;
1162  OutlinedRegionCost += CurrentOutlinedRegionCost;
1163 
1164  if (MarkOutlinedColdCC) {
1165  OutlinedFunc->setCallingConv(CallingConv::Cold);
1167  }
1168  } else
1169  ORE.emit([&]() {
1170  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1171  &RegionInfo.Region.front()->front())
1172  << "Failed to extract region at block "
1173  << ore::NV("Block", RegionInfo.Region.front());
1174  });
1175  }
1176 
1177  return !OutlinedFunctions.empty();
1178 }
1179 
1180 Function *
1181 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1182  // Returns true if the block is to be partial inlined into the caller
1183  // (i.e. not to be extracted to the out of line function)
1184  auto ToBeInlined = [&, this](BasicBlock *BB) {
1185  return BB == ClonedOI->ReturnBlock ||
1186  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1187  ClonedOI->Entries.end());
1188  };
1189 
1190  assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1191  // The CodeExtractor needs a dominator tree.
1192  DominatorTree DT;
1193  DT.recalculate(*ClonedFunc);
1194 
1195  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1196  LoopInfo LI(DT);
1197  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1198  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1199 
1200  // Gather up the blocks that we're going to extract.
1201  std::vector<BasicBlock *> ToExtract;
1202  ToExtract.push_back(ClonedOI->NonReturnBlock);
1203  OutlinedRegionCost +=
1204  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1205  for (BasicBlock &BB : *ClonedFunc)
1206  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1207  ToExtract.push_back(&BB);
1208  // FIXME: the code extractor may hoist/sink more code
1209  // into the outlined function which may make the outlining
1210  // overhead (the difference of the outlined function cost
1211  // and OutliningRegionCost) look larger.
1212  OutlinedRegionCost += computeBBInlineCost(&BB);
1213  }
1214 
1215  // Extract the body of the if.
1216  Function *OutlinedFunc =
1217  CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1218  ClonedFuncBFI.get(), &BPI,
1219  /* AllowVarargs */ true)
1220  .extractCodeRegion();
1221 
1222  if (OutlinedFunc) {
1223  BasicBlock *OutliningCallBB =
1224  PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1225  .getInstruction()
1226  ->getParent();
1227  assert(OutliningCallBB->getParent() == ClonedFunc);
1228  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1229  } else
1230  ORE.emit([&]() {
1231  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1232  &ToExtract.front()->front())
1233  << "Failed to extract region at block "
1234  << ore::NV("Block", ToExtract.front());
1235  });
1236 
1237  return OutlinedFunc;
1238 }
1239 
1240 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1241  // Ditch the duplicate, since we're done with it, and rewrite all remaining
1242  // users (function pointers, etc.) back to the original function.
1243  ClonedFunc->replaceAllUsesWith(OrigFunc);
1244  ClonedFunc->eraseFromParent();
1245  if (!IsFunctionInlined) {
1246  // Remove each function that was speculatively created if there is no
1247  // reference.
1248  for (auto FuncBBPair : OutlinedFunctions) {
1249  Function *Func = FuncBBPair.first;
1250  Func->eraseFromParent();
1251  }
1252  }
1253 }
1254 
1255 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1256 
1257  if (F->hasAddressTaken())
1258  return {false, nullptr};
1259 
1260  // Let inliner handle it
1261  if (F->hasFnAttribute(Attribute::AlwaysInline))
1262  return {false, nullptr};
1263 
1264  if (F->hasFnAttribute(Attribute::NoInline))
1265  return {false, nullptr};
1266 
1267  if (PSI->isFunctionEntryCold(F))
1268  return {false, nullptr};
1269 
1270  if (F->user_begin() == F->user_end())
1271  return {false, nullptr};
1272 
1273  auto &ORE = (*GetORE)(*F);
1274 
1275  // Only try to outline cold regions if we have a profile summary, which
1276  // implies we have profiling information.
1277  if (PSI->hasProfileSummary() && F->hasProfileData() &&
1279  std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1280  computeOutliningColdRegionsInfo(F);
1281  if (OMRI) {
1282  FunctionCloner Cloner(F, OMRI.get(), ORE);
1283 
1284 #ifndef NDEBUG
1285  if (TracePartialInlining) {
1286  dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1287  dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1288  << "\n";
1289  }
1290 #endif
1291  bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1292 
1293  if (DidOutline) {
1294 #ifndef NDEBUG
1295  if (TracePartialInlining) {
1296  dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1297  Cloner.ClonedFunc->print(dbgs());
1298  dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1299  }
1300 #endif
1301 
1302  if (tryPartialInline(Cloner))
1303  return {true, nullptr};
1304  }
1305  }
1306  }
1307 
1308  // Fall-thru to regular partial inlining if we:
1309  // i) can't find any cold regions to outline, or
1310  // ii) can't inline the outlined function anywhere.
1311  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1312  if (!OI)
1313  return {false, nullptr};
1314 
1315  FunctionCloner Cloner(F, OI.get(), ORE);
1316  Cloner.NormalizeReturnBlock();
1317 
1318  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1319 
1320  if (!OutlinedFunction)
1321  return {false, nullptr};
1322 
1323  bool AnyInline = tryPartialInline(Cloner);
1324 
1325  if (AnyInline)
1326  return {true, OutlinedFunction};
1327 
1328  return {false, nullptr};
1329 }
1330 
1331 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1332  if (Cloner.OutlinedFunctions.empty())
1333  return false;
1334 
1335  int SizeCost = 0;
1336  BlockFrequency WeightedRcost;
1337  int NonWeightedRcost;
1338  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1339 
1340  // Only calculate RelativeToEntryFreq when we are doing single region
1341  // outlining.
1342  BranchProbability RelativeToEntryFreq;
1343  if (Cloner.ClonedOI) {
1344  RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1345  } else
1346  // RelativeToEntryFreq doesn't make sense when we have more than one
1347  // outlined call because each call will have a different relative frequency
1348  // to the entry block. We can consider using the average, but the
1349  // usefulness of that information is questionable. For now, assume we never
1350  // execute the calls to outlined functions.
1351  RelativeToEntryFreq = BranchProbability(0, 1);
1352 
1353  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1354 
1355  // The call sequence(s) to the outlined function(s) are larger than the sum of
1356  // the original outlined region size(s), it does not increase the chances of
1357  // inlining the function with outlining (The inliner uses the size increase to
1358  // model the cost of inlining a callee).
1359  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1360  auto &ORE = (*GetORE)(*Cloner.OrigFunc);
1361  DebugLoc DLoc;
1362  BasicBlock *Block;
1363  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1364  ORE.emit([&]() {
1365  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1366  DLoc, Block)
1367  << ore::NV("Function", Cloner.OrigFunc)
1368  << " not partially inlined into callers (Original Size = "
1369  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1370  << ", Size of call sequence to outlined function = "
1371  << ore::NV("NewSize", SizeCost) << ")";
1372  });
1373  return false;
1374  }
1375 
1376  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
1377  "F's users should all be replaced!");
1378 
1379  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1380  Cloner.ClonedFunc->user_end());
1381 
1382  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1383  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1384  if (CalleeEntryCount)
1385  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1386 
1387  uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
1388 
1389  bool AnyInline = false;
1390  for (User *User : Users) {
1391  CallSite CS = getCallSite(User);
1392 
1393  if (IsLimitReached())
1394  continue;
1395 
1396 
1397  if (!shouldPartialInline(CS, Cloner, WeightedRcost))
1398  continue;
1399 
1400  auto &ORE = (*GetORE)(*CS.getCaller());
1401  // Construct remark before doing the inlining, as after successful inlining
1402  // the callsite is removed.
1403  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1404  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1405  << ore::NV("Caller", CS.getCaller());
1406 
1407  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
1408  // We can only forward varargs when we outlined a single region, else we
1409  // bail on vararg functions.
1410  if (!InlineFunction(CS, IFI, nullptr, true,
1411  (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1412  : nullptr)))
1413  continue;
1414 
1415  ORE.emit(OR);
1416 
1417  // Now update the entry count:
1418  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1419  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1420  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1421  }
1422 
1423  AnyInline = true;
1424  NumPartialInlining++;
1425  // Update the stats
1426  if (Cloner.ClonedOI)
1427  NumPartialInlined++;
1428  else
1429  NumColdOutlinePartialInlined++;
1430 
1431  }
1432 
1433  if (AnyInline) {
1434  Cloner.IsFunctionInlined = true;
1435  if (CalleeEntryCount)
1436  Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
1437  auto &ORE = (*GetORE)(*Cloner.OrigFunc);
1438  ORE.emit([&]() {
1439  return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1440  << "Partially inlined into at least one caller";
1441  });
1442 
1443  }
1444 
1445  return AnyInline;
1446 }
1447 
1448 bool PartialInlinerImpl::run(Module &M) {
1450  return false;
1451 
1452  std::vector<Function *> Worklist;
1453  Worklist.reserve(M.size());
1454  for (Function &F : M)
1455  if (!F.use_empty() && !F.isDeclaration())
1456  Worklist.push_back(&F);
1457 
1458  bool Changed = false;
1459  while (!Worklist.empty()) {
1460  Function *CurrFunc = Worklist.back();
1461  Worklist.pop_back();
1462 
1463  if (CurrFunc->use_empty())
1464  continue;
1465 
1466  bool Recursive = false;
1467  for (User *U : CurrFunc->users())
1468  if (Instruction *I = dyn_cast<Instruction>(U))
1469  if (I->getParent()->getParent() == CurrFunc) {
1470  Recursive = true;
1471  break;
1472  }
1473  if (Recursive)
1474  continue;
1475 
1476  std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1477  if (Result.second)
1478  Worklist.push_back(Result.second);
1479  if (Result.first) {
1480  Changed = true;
1481  }
1482  }
1483 
1484  return Changed;
1485 }
1486 
1488 
1489 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1490  "Partial Inliner", false, false)
1494 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1495  "Partial Inliner", false, false)
1496 
1498  return new PartialInlinerLegacyPass();
1499 }
1500 
1502  ModuleAnalysisManager &AM) {
1503  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1504 
1505  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1506  [&FAM](Function &F) -> AssumptionCache & {
1507  return FAM.getResult<AssumptionAnalysis>(F);
1508  };
1509 
1510  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1511  [&FAM](Function &F) -> BlockFrequencyInfo & {
1512  return FAM.getResult<BlockFrequencyAnalysis>(F);
1513  };
1514 
1515  std::function<TargetTransformInfo &(Function &)> GetTTI =
1516  [&FAM](Function &F) -> TargetTransformInfo & {
1517  return FAM.getResult<TargetIRAnalysis>(F);
1518  };
1519 
1520  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
1521  [&FAM](Function &F) -> OptimizationRemarkEmitter & {
1522  return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1523  };
1524 
1526 
1527  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE)
1528  .run(M))
1529  return PreservedAnalyses::none();
1530  return PreservedAnalyses::all();
1531 }
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:50
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:99
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
partial Partial Inliner
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
CloneFunction - Return a copy of the specified function and add it to that function&#39;s module...
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
Definition: Function.h:611
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:270
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:814
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
iv Induction Variable Users
Definition: IVUsers.cpp:51
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:176
Represents the cost of inlining a function.
Definition: InlineCost.h:65
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
#define T
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:127
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:244
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
amdgpu Simplify well known AMD library false Value * Callee
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:198
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isAlways() const
Definition: InlineCost.h:98
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:597
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
Definition: BasicBlock.h:264
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
Represent the analysis usage information of a pass.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:835
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
iterator_range< user_iterator > users()
Definition: Value.h:405
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:104
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:118
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
size_t size() const
Definition: Module.h:580
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:201
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1212
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:381
const BasicBlock & front() const
Definition: Function.h:609
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:559
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1303
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:257
bool use_empty() const
Definition: Value.h:328
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
user_iterator user_end()
Definition: Value.h:389
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:873