LLVM  6.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CFG.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/DiagnosticInfo.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
51 #include "llvm/Transforms/IPO.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <functional>
59 #include <iterator>
60 #include <memory>
61 #include <tuple>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "partial-inlining"
67 
68 STATISTIC(NumPartialInlined,
69  "Number of callsites functions partially inlined into.");
70 
71 // Command line option to disable partial-inlining. The default is false:
72 static cl::opt<bool>
73  DisablePartialInlining("disable-partial-inlining", cl::init(false),
74  cl::Hidden, cl::desc("Disable partial ininling"));
75 
76 // This is an option used by testing:
77 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
78  cl::init(false), cl::ZeroOrMore,
80  cl::desc("Skip Cost Analysis"));
81 
83  "max-num-inline-blocks", cl::init(5), cl::Hidden,
84  cl::desc("Max number of blocks to be partially inlined"));
85 
86 // Command line option to set the maximum number of partial inlining allowed
87 // for the module. The default value of -1 means no limit.
89  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
90  cl::desc("Max number of partial inlining. The default is unlimited"));
91 
92 // Used only when PGO or user annotated branch data is absent. It is
93 // the least value that is used to weigh the outline region. If BFI
94 // produces larger value, the BFI value will be used.
95 static cl::opt<int>
96  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
98  cl::desc("Relative frequency of outline region to "
99  "the entry block"));
100 
102  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
103  cl::desc("A debug option to add additional penalty to the computed one."));
104 
105 namespace {
106 
107 struct FunctionOutliningInfo {
108  FunctionOutliningInfo() = default;
109 
110  // Returns the number of blocks to be inlined including all blocks
111  // in Entries and one return block.
112  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
113 
114  // A set of blocks including the function entry that guard
115  // the region to be outlined.
117 
118  // The return block that is not included in the outlined region.
119  BasicBlock *ReturnBlock = nullptr;
120 
121  // The dominating block of the region to be outlined.
122  BasicBlock *NonReturnBlock = nullptr;
123 
124  // The set of blocks in Entries that that are predecessors to ReturnBlock
125  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
126 };
127 
128 struct PartialInlinerImpl {
129  PartialInlinerImpl(
133  ProfileSummaryInfo *ProfSI)
134  : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
135 
136  bool run(Module &M);
137  Function *unswitchFunction(Function *F);
138 
139  // This class speculatively clones the the function to be partial inlined.
140  // At the end of partial inlining, the remaining callsites to the cloned
141  // function that are not partially inlined will be fixed up to reference
142  // the original function, and the cloned function will be erased.
143  struct FunctionCloner {
144  FunctionCloner(Function *F, FunctionOutliningInfo *OI);
145  ~FunctionCloner();
146 
147  // Prepare for function outlining: making sure there is only
148  // one incoming edge from the extracted/outlined region to
149  // the return block.
150  void NormalizeReturnBlock();
151 
152  // Do function outlining.
153  // NOTE: For vararg functions that do the vararg handling in the outlined
154  // function, we temporarily generate IR that does not properly
155  // forward varargs to the outlined function. Calling InlineFunction
156  // will update calls to the outlined functions to properly forward
157  // the varargs.
158  Function *doFunctionOutlining();
159 
160  Function *OrigFunc = nullptr;
161  Function *ClonedFunc = nullptr;
162  Function *OutlinedFunc = nullptr;
163  BasicBlock *OutliningCallBB = nullptr;
164  // ClonedFunc is inlined in one of its callers after function
165  // outlining.
166  bool IsFunctionInlined = false;
167  // The cost of the region to be outlined.
168  int OutlinedRegionCost = 0;
169  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
170  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
171  };
172 
173 private:
174  int NumPartialInlining = 0;
175  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
176  std::function<TargetTransformInfo &(Function &)> *GetTTI;
178  ProfileSummaryInfo *PSI;
179 
180  // Return the frequency of the OutlininingBB relative to F's entry point.
181  // The result is no larger than 1 and is represented using BP.
182  // (Note that the outlined region's 'head' block can only have incoming
183  // edges from the guarding entry blocks).
184  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
185 
186  // Return true if the callee of CS should be partially inlined with
187  // profit.
188  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
189  BlockFrequency WeightedOutliningRcost,
191 
192  // Try to inline DuplicateFunction (cloned from F with call to
193  // the OutlinedFunction into its callers. Return true
194  // if there is any successful inlining.
195  bool tryPartialInline(FunctionCloner &Cloner);
196 
197  // Compute the mapping from use site of DuplicationFunction to the enclosing
198  // BB's profile count.
199  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
200  DenseMap<User *, uint64_t> &SiteCountMap);
201 
202  bool IsLimitReached() {
203  return (MaxNumPartialInlining != -1 &&
204  NumPartialInlining >= MaxNumPartialInlining);
205  }
206 
207  static CallSite getCallSite(User *U) {
208  CallSite CS;
209  if (CallInst *CI = dyn_cast<CallInst>(U))
210  CS = CallSite(CI);
211  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
212  CS = CallSite(II);
213  else
214  llvm_unreachable("All uses must be calls");
215  return CS;
216  }
217 
218  static CallSite getOneCallSiteTo(Function *F) {
219  User *User = *F->user_begin();
220  return getCallSite(User);
221  }
222 
223  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
224  CallSite CS = getOneCallSiteTo(F);
225  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
226  BasicBlock *Block = CS.getParent();
227  return std::make_tuple(DLoc, Block);
228  }
229 
230  // Returns the costs associated with function outlining:
231  // - The first value is the non-weighted runtime cost for making the call
232  // to the outlined function, including the addtional setup cost in the
233  // outlined function itself;
234  // - The second value is the estimated size of the new call sequence in
235  // basic block Cloner.OutliningCallBB;
236  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
237 
238  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
239  // approximate both the size and runtime cost (Note that in the current
240  // inline cost analysis, there is no clear distinction there either).
241  static int computeBBInlineCost(BasicBlock *BB);
242 
243  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
244 };
245 
246 struct PartialInlinerLegacyPass : public ModulePass {
247  static char ID; // Pass identification, replacement for typeid
248 
249  PartialInlinerLegacyPass() : ModulePass(ID) {
251  }
252 
253  void getAnalysisUsage(AnalysisUsage &AU) const override {
257  }
258 
259  bool runOnModule(Module &M) override {
260  if (skipModule(M))
261  return false;
262 
263  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
265  &getAnalysis<TargetTransformInfoWrapperPass>();
266  ProfileSummaryInfo *PSI =
267  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
268 
269  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
270  [&ACT](Function &F) -> AssumptionCache & {
271  return ACT->getAssumptionCache(F);
272  };
273 
274  std::function<TargetTransformInfo &(Function &)> GetTTI =
275  [&TTIWP](Function &F) -> TargetTransformInfo & {
276  return TTIWP->getTTI(F);
277  };
278 
279  return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
280  }
281 };
282 
283 } // end anonymous namespace
284 
285 std::unique_ptr<FunctionOutliningInfo>
286 PartialInlinerImpl::computeOutliningInfo(Function *F) {
287  BasicBlock *EntryBlock = &F->front();
288  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
289  if (!BR || BR->isUnconditional())
290  return std::unique_ptr<FunctionOutliningInfo>();
291 
292  // Returns true if Succ is BB's successor
293  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
294  return is_contained(successors(BB), Succ);
295  };
296 
297  auto SuccSize = [](BasicBlock *BB) {
298  return std::distance(succ_begin(BB), succ_end(BB));
299  };
300 
301  auto IsReturnBlock = [](BasicBlock *BB) {
302  TerminatorInst *TI = BB->getTerminator();
303  return isa<ReturnInst>(TI);
304  };
305 
306  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
307  if (IsReturnBlock(Succ1))
308  return std::make_tuple(Succ1, Succ2);
309  if (IsReturnBlock(Succ2))
310  return std::make_tuple(Succ2, Succ1);
311 
312  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
313  };
314 
315  // Detect a triangular shape:
316  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
317  if (IsSuccessor(Succ1, Succ2))
318  return std::make_tuple(Succ1, Succ2);
319  if (IsSuccessor(Succ2, Succ1))
320  return std::make_tuple(Succ2, Succ1);
321 
322  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
323  };
324 
325  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
326  llvm::make_unique<FunctionOutliningInfo>();
327 
328  BasicBlock *CurrEntry = EntryBlock;
329  bool CandidateFound = false;
330  do {
331  // The number of blocks to be inlined has already reached
332  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
333  // disables partial inlining for the function.
334  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
335  break;
336 
337  if (SuccSize(CurrEntry) != 2)
338  break;
339 
340  BasicBlock *Succ1 = *succ_begin(CurrEntry);
341  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
342 
343  BasicBlock *ReturnBlock, *NonReturnBlock;
344  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
345 
346  if (ReturnBlock) {
347  OutliningInfo->Entries.push_back(CurrEntry);
348  OutliningInfo->ReturnBlock = ReturnBlock;
349  OutliningInfo->NonReturnBlock = NonReturnBlock;
350  CandidateFound = true;
351  break;
352  }
353 
354  BasicBlock *CommSucc;
356  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
357 
358  if (!CommSucc)
359  break;
360 
361  OutliningInfo->Entries.push_back(CurrEntry);
362  CurrEntry = OtherSucc;
363  } while (true);
364 
365  if (!CandidateFound)
366  return std::unique_ptr<FunctionOutliningInfo>();
367 
368  // Do sanity check of the entries: threre should not
369  // be any successors (not in the entry set) other than
370  // {ReturnBlock, NonReturnBlock}
371  assert(OutliningInfo->Entries[0] == &F->front() &&
372  "Function Entry must be the first in Entries vector");
373  DenseSet<BasicBlock *> Entries;
374  for (BasicBlock *E : OutliningInfo->Entries)
375  Entries.insert(E);
376 
377  // Returns true of BB has Predecessor which is not
378  // in Entries set.
379  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
380  for (auto Pred : predecessors(BB)) {
381  if (!Entries.count(Pred))
382  return true;
383  }
384  return false;
385  };
386  auto CheckAndNormalizeCandidate =
387  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
388  for (BasicBlock *E : OutliningInfo->Entries) {
389  for (auto Succ : successors(E)) {
390  if (Entries.count(Succ))
391  continue;
392  if (Succ == OutliningInfo->ReturnBlock)
393  OutliningInfo->ReturnBlockPreds.push_back(E);
394  else if (Succ != OutliningInfo->NonReturnBlock)
395  return false;
396  }
397  // There should not be any outside incoming edges either:
398  if (HasNonEntryPred(E))
399  return false;
400  }
401  return true;
402  };
403 
404  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
405  return std::unique_ptr<FunctionOutliningInfo>();
406 
407  // Now further growing the candidate's inlining region by
408  // peeling off dominating blocks from the outlining region:
409  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
410  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
411  if (SuccSize(Cand) != 2)
412  break;
413 
414  if (HasNonEntryPred(Cand))
415  break;
416 
417  BasicBlock *Succ1 = *succ_begin(Cand);
418  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
419 
420  BasicBlock *ReturnBlock, *NonReturnBlock;
421  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
422  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
423  break;
424 
425  if (NonReturnBlock->getSinglePredecessor() != Cand)
426  break;
427 
428  // Now grow and update OutlininigInfo:
429  OutliningInfo->Entries.push_back(Cand);
430  OutliningInfo->NonReturnBlock = NonReturnBlock;
431  OutliningInfo->ReturnBlockPreds.push_back(Cand);
432  Entries.insert(Cand);
433  }
434 
435  return OutliningInfo;
436 }
437 
438 // Check if there is PGO data or user annoated branch data:
439 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
440  if (F->getEntryCount())
441  return true;
442  // Now check if any of the entry block has MD_prof data:
443  for (auto *E : OI->Entries) {
445  if (!BR || BR->isUnconditional())
446  continue;
447  uint64_t T, F;
448  if (BR->extractProfMetadata(T, F))
449  return true;
450  }
451  return false;
452 }
453 
455 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
456  auto EntryFreq =
457  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
458  auto OutliningCallFreq =
459  Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
460 
461  auto OutlineRegionRelFreq =
462  BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
463  EntryFreq.getFrequency());
464 
465  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
466  return OutlineRegionRelFreq;
467 
468  // When profile data is not available, we need to be conservative in
469  // estimating the overall savings. Static branch prediction can usually
470  // guess the branch direction right (taken/non-taken), but the guessed
471  // branch probability is usually not biased enough. In case when the
472  // outlined region is predicted to be likely, its probability needs
473  // to be made higher (more biased) to not under-estimate the cost of
474  // function outlining. On the other hand, if the outlined region
475  // is predicted to be less likely, the predicted probablity is usually
476  // higher than the actual. For instance, the actual probability of the
477  // less likely target is only 5%, but the guessed probablity can be
478  // 40%. In the latter case, there is no need for further adjustement.
479  // FIXME: add an option for this.
480  if (OutlineRegionRelFreq < BranchProbability(45, 100))
481  return OutlineRegionRelFreq;
482 
483  OutlineRegionRelFreq = std::max(
484  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
485 
486  return OutlineRegionRelFreq;
487 }
488 
489 bool PartialInlinerImpl::shouldPartialInline(
490  CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
492  using namespace ore;
493 
494  if (SkipCostAnalysis)
495  return true;
496 
497  Instruction *Call = CS.getInstruction();
499  assert(Callee == Cloner.ClonedFunc);
500 
501  Function *Caller = CS.getCaller();
502  auto &CalleeTTI = (*GetTTI)(*Callee);
503  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
504  *GetAssumptionCache, GetBFI, PSI, &ORE);
505 
506  if (IC.isAlways()) {
507  ORE.emit([&]() {
508  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
509  << NV("Callee", Cloner.OrigFunc)
510  << " should always be fully inlined, not partially";
511  });
512  return false;
513  }
514 
515  if (IC.isNever()) {
516  ORE.emit([&]() {
517  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
518  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
519  << NV("Caller", Caller)
520  << " because it should never be inlined (cost=never)";
521  });
522  return false;
523  }
524 
525  if (!IC) {
526  ORE.emit([&]() {
527  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
528  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
529  << NV("Caller", Caller) << " because too costly to inline (cost="
530  << NV("Cost", IC.getCost()) << ", threshold="
531  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
532  });
533  return false;
534  }
535  const DataLayout &DL = Caller->getParent()->getDataLayout();
536 
537  // The savings of eliminating the call:
538  int NonWeightedSavings = getCallsiteCost(CS, DL);
539  BlockFrequency NormWeightedSavings(NonWeightedSavings);
540 
541  // Weighted saving is smaller than weighted cost, return false
542  if (NormWeightedSavings < WeightedOutliningRcost) {
543  ORE.emit([&]() {
544  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
545  Call)
546  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
547  << NV("Caller", Caller) << " runtime overhead (overhead="
548  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
549  << ", savings="
550  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
551  << ")"
552  << " of making the outlined call is too high";
553  });
554 
555  return false;
556  }
557 
558  ORE.emit([&]() {
559  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
560  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
561  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
562  << " (threshold="
563  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
564  });
565  return true;
566 }
567 
568 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
569 // For now use a simplified version. The returned 'InlineCost' will be used
570 // to esimate the size cost as well as runtime cost of the BB.
571 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
572  int InlineCost = 0;
573  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
574  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
575  if (isa<DbgInfoIntrinsic>(I))
576  continue;
577 
578  switch (I->getOpcode()) {
579  case Instruction::BitCast:
580  case Instruction::PtrToInt:
581  case Instruction::IntToPtr:
582  case Instruction::Alloca:
583  continue;
584  case Instruction::GetElementPtr:
585  if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
586  continue;
587  default:
588  break;
589  }
590 
591  IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
592  if (IntrInst) {
593  if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
594  IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
595  continue;
596  }
597 
598  if (CallInst *CI = dyn_cast<CallInst>(I)) {
599  InlineCost += getCallsiteCost(CallSite(CI), DL);
600  continue;
601  }
602 
603  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
604  InlineCost += getCallsiteCost(CallSite(II), DL);
605  continue;
606  }
607 
608  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
609  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
610  continue;
611  }
612  InlineCost += InlineConstants::InstrCost;
613  }
614  return InlineCost;
615 }
616 
617 std::tuple<int, int>
618 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
619  // Now compute the cost of the call sequence to the outlined function
620  // 'OutlinedFunction' in BB 'OutliningCallBB':
621  int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
622 
623  // Now compute the cost of the extracted/outlined function itself:
624  int OutlinedFunctionCost = 0;
625  for (BasicBlock &BB : *Cloner.OutlinedFunc) {
626  OutlinedFunctionCost += computeBBInlineCost(&BB);
627  }
628 
629  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
630  "Outlined function cost should be no less than the outlined region");
631  // The code extractor introduces a new root and exit stub blocks with
632  // additional unconditional branches. Those branches will be eliminated
633  // later with bb layout. The cost should be adjusted accordingly:
634  OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
635 
636  int OutliningRuntimeOverhead =
637  OutliningFuncCallCost +
638  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
640 
641  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
642 }
643 
644 // Create the callsite to profile count map which is
645 // used to update the original function's entry count,
646 // after the function is partially inlined into the callsite.
647 void PartialInlinerImpl::computeCallsiteToProfCountMap(
648  Function *DuplicateFunction,
649  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
650  std::vector<User *> Users(DuplicateFunction->user_begin(),
651  DuplicateFunction->user_end());
652  Function *CurrentCaller = nullptr;
653  std::unique_ptr<BlockFrequencyInfo> TempBFI;
654  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
655 
656  auto ComputeCurrBFI = [&,this](Function *Caller) {
657  // For the old pass manager:
658  if (!GetBFI) {
659  DominatorTree DT(*Caller);
660  LoopInfo LI(DT);
661  BranchProbabilityInfo BPI(*Caller, LI);
662  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
663  CurrentCallerBFI = TempBFI.get();
664  } else {
665  // New pass manager:
666  CurrentCallerBFI = &(*GetBFI)(*Caller);
667  }
668  };
669 
670  for (User *User : Users) {
671  CallSite CS = getCallSite(User);
672  Function *Caller = CS.getCaller();
673  if (CurrentCaller != Caller) {
674  CurrentCaller = Caller;
675  ComputeCurrBFI(Caller);
676  } else {
677  assert(CurrentCallerBFI && "CallerBFI is not set");
678  }
679  BasicBlock *CallBB = CS.getInstruction()->getParent();
680  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
681  if (Count)
682  CallSiteToProfCountMap[User] = *Count;
683  else
684  CallSiteToProfCountMap[User] = 0;
685  }
686 }
687 
688 PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
689  FunctionOutliningInfo *OI)
690  : OrigFunc(F) {
691  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
692 
693  // Clone the function, so that we can hack away on it.
694  ValueToValueMapTy VMap;
695  ClonedFunc = CloneFunction(F, VMap);
696 
697  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
698  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
699  for (BasicBlock *BB : OI->Entries) {
700  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
701  }
702  for (BasicBlock *E : OI->ReturnBlockPreds) {
703  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
704  ClonedOI->ReturnBlockPreds.push_back(NewE);
705  }
706  // Go ahead and update all uses to the duplicate, so that we can just
707  // use the inliner functionality when we're done hacking.
708  F->replaceAllUsesWith(ClonedFunc);
709 }
710 
711 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
712  auto getFirstPHI = [](BasicBlock *BB) {
713  BasicBlock::iterator I = BB->begin();
714  PHINode *FirstPhi = nullptr;
715  while (I != BB->end()) {
716  PHINode *Phi = dyn_cast<PHINode>(I);
717  if (!Phi)
718  break;
719  if (!FirstPhi) {
720  FirstPhi = Phi;
721  break;
722  }
723  }
724  return FirstPhi;
725  };
726 
727  // Special hackery is needed with PHI nodes that have inputs from more than
728  // one extracted block. For simplicity, just split the PHIs into a two-level
729  // sequence of PHIs, some of which will go in the extracted region, and some
730  // of which will go outside.
731  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
732  // only split block when necessary:
733  PHINode *FirstPhi = getFirstPHI(PreReturn);
734  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
735 
736  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
737  return;
738 
739  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
740  Value *CommonValue = PN->getIncomingValue(0);
741  if (all_of(PN->incoming_values(),
742  [&](Value *V) { return V == CommonValue; }))
743  return CommonValue;
744  return nullptr;
745  };
746 
747  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
748  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
749  BasicBlock::iterator I = PreReturn->begin();
750  Instruction *Ins = &ClonedOI->ReturnBlock->front();
752  while (I != PreReturn->end()) {
753  PHINode *OldPhi = dyn_cast<PHINode>(I);
754  if (!OldPhi)
755  break;
756 
757  PHINode *RetPhi =
758  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
759  OldPhi->replaceAllUsesWith(RetPhi);
760  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
761 
762  RetPhi->addIncoming(&*I, PreReturn);
763  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
764  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
765  OldPhi->removeIncomingValue(E);
766  }
767 
768  // After incoming values splitting, the old phi may become trivial.
769  // Keeping the trivial phi can introduce definition inside the outline
770  // region which is live-out, causing necessary overhead (load, store
771  // arg passing etc).
772  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
773  OldPhi->replaceAllUsesWith(OldPhiVal);
774  DeadPhis.push_back(OldPhi);
775  }
776  ++I;
777  }
778  for (auto *DP : DeadPhis)
779  DP->eraseFromParent();
780 
781  for (auto E : ClonedOI->ReturnBlockPreds) {
782  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
783  }
784 }
785 
786 Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
787  // Returns true if the block is to be partial inlined into the caller
788  // (i.e. not to be extracted to the out of line function)
789  auto ToBeInlined = [&, this](BasicBlock *BB) {
790  return BB == ClonedOI->ReturnBlock ||
791  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
792  ClonedOI->Entries.end());
793  };
794 
795  // Gather up the blocks that we're going to extract.
796  std::vector<BasicBlock *> ToExtract;
797  ToExtract.push_back(ClonedOI->NonReturnBlock);
798  OutlinedRegionCost +=
799  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
800  for (BasicBlock &BB : *ClonedFunc)
801  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
802  ToExtract.push_back(&BB);
803  // FIXME: the code extractor may hoist/sink more code
804  // into the outlined function which may make the outlining
805  // overhead (the difference of the outlined function cost
806  // and OutliningRegionCost) look larger.
807  OutlinedRegionCost += computeBBInlineCost(&BB);
808  }
809 
810  // The CodeExtractor needs a dominator tree.
811  DominatorTree DT;
812  DT.recalculate(*ClonedFunc);
813 
814  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
815  LoopInfo LI(DT);
816  BranchProbabilityInfo BPI(*ClonedFunc, LI);
817  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
818 
819  // Extract the body of the if.
820  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
821  ClonedFuncBFI.get(), &BPI,
822  /* AllowVarargs */ true)
823  .extractCodeRegion();
824 
825  if (OutlinedFunc) {
826  OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
827  .getInstruction()
828  ->getParent();
829  assert(OutliningCallBB->getParent() == ClonedFunc);
830  }
831 
832  return OutlinedFunc;
833 }
834 
835 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
836  // Ditch the duplicate, since we're done with it, and rewrite all remaining
837  // users (function pointers, etc.) back to the original function.
838  ClonedFunc->replaceAllUsesWith(OrigFunc);
839  ClonedFunc->eraseFromParent();
840  if (!IsFunctionInlined) {
841  // Remove the function that is speculatively created if there is no
842  // reference.
843  if (OutlinedFunc)
844  OutlinedFunc->eraseFromParent();
845  }
846 }
847 
848 Function *PartialInlinerImpl::unswitchFunction(Function *F) {
849  if (F->hasAddressTaken())
850  return nullptr;
851 
852  // Let inliner handle it
853  if (F->hasFnAttribute(Attribute::AlwaysInline))
854  return nullptr;
855 
856  if (F->hasFnAttribute(Attribute::NoInline))
857  return nullptr;
858 
859  if (PSI->isFunctionEntryCold(F))
860  return nullptr;
861 
862  if (F->user_begin() == F->user_end())
863  return nullptr;
864 
865  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
866 
867  if (!OI)
868  return nullptr;
869 
870  FunctionCloner Cloner(F, OI.get());
871  Cloner.NormalizeReturnBlock();
872  Function *OutlinedFunction = Cloner.doFunctionOutlining();
873 
874  bool AnyInline = tryPartialInline(Cloner);
875 
876  if (AnyInline)
877  return OutlinedFunction;
878 
879  return nullptr;
880 }
881 
882 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
883  int NonWeightedRcost;
884  int SizeCost;
885 
886  if (Cloner.OutlinedFunc == nullptr)
887  return false;
888 
889  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
890 
891  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
892  auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
893 
894  // The call sequence to the outlined function is larger than the original
895  // outlined region size, it does not increase the chances of inlining
896  // the function with outlining (The inliner uses the size increase to
897  // model the cost of inlining a callee).
898  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
899  OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
900  DebugLoc DLoc;
901  BasicBlock *Block;
902  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
903  ORE.emit([&]() {
904  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
905  DLoc, Block)
906  << ore::NV("Function", Cloner.OrigFunc)
907  << " not partially inlined into callers (Original Size = "
908  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
909  << ", Size of call sequence to outlined function = "
910  << ore::NV("NewSize", SizeCost) << ")";
911  });
912  return false;
913  }
914 
915  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
916  "F's users should all be replaced!");
917 
918  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
919  Cloner.ClonedFunc->user_end());
920 
921  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
922  if (Cloner.OrigFunc->getEntryCount())
923  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
924 
925  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
926  uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
927 
928  bool AnyInline = false;
929  for (User *User : Users) {
930  CallSite CS = getCallSite(User);
931 
932  if (IsLimitReached())
933  continue;
934 
936 
937  if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
938  continue;
939 
940  // Construct remark before doing the inlining, as after successful inlining
941  // the callsite is removed.
942  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
943  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
944  << ore::NV("Caller", CS.getCaller());
945 
946  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
947  if (!InlineFunction(CS, IFI, nullptr, true, Cloner.OutlinedFunc))
948  continue;
949 
950  ORE.emit(OR);
951 
952  // Now update the entry count:
953  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
954  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
955  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
956  }
957 
958  AnyInline = true;
959  NumPartialInlining++;
960  // Update the stats
961  NumPartialInlined++;
962  }
963 
964  if (AnyInline) {
965  Cloner.IsFunctionInlined = true;
966  if (CalleeEntryCount)
967  Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
968  }
969 
970  return AnyInline;
971 }
972 
973 bool PartialInlinerImpl::run(Module &M) {
975  return false;
976 
977  std::vector<Function *> Worklist;
978  Worklist.reserve(M.size());
979  for (Function &F : M)
980  if (!F.use_empty() && !F.isDeclaration())
981  Worklist.push_back(&F);
982 
983  bool Changed = false;
984  while (!Worklist.empty()) {
985  Function *CurrFunc = Worklist.back();
986  Worklist.pop_back();
987 
988  if (CurrFunc->use_empty())
989  continue;
990 
991  bool Recursive = false;
992  for (User *U : CurrFunc->users())
993  if (Instruction *I = dyn_cast<Instruction>(U))
994  if (I->getParent()->getParent() == CurrFunc) {
995  Recursive = true;
996  break;
997  }
998  if (Recursive)
999  continue;
1000 
1001  if (Function *NewFunc = unswitchFunction(CurrFunc)) {
1002  Worklist.push_back(NewFunc);
1003  Changed = true;
1004  }
1005  }
1006 
1007  return Changed;
1008 }
1009 
1011 
1012 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1013  "Partial Inliner", false, false)
1017 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1018  "Partial Inliner", false, false)
1019 
1021  return new PartialInlinerLegacyPass();
1022 }
1023 
1025  ModuleAnalysisManager &AM) {
1026  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1027 
1028  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1029  [&FAM](Function &F) -> AssumptionCache & {
1030  return FAM.getResult<AssumptionAnalysis>(F);
1031  };
1032 
1033  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1034  [&FAM](Function &F) -> BlockFrequencyInfo & {
1035  return FAM.getResult<BlockFrequencyAnalysis>(F);
1036  };
1037 
1038  std::function<TargetTransformInfo &(Function &)> GetTTI =
1039  [&FAM](Function &F) -> TargetTransformInfo & {
1040  return FAM.getResult<TargetIRAnalysis>(F);
1041  };
1042 
1044 
1045  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
1046  return PreservedAnalyses::none();
1047  return PreservedAnalyses::all();
1048 }
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:50
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:99
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
partial Partial Inliner
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
CloneFunction - Return a copy of the specified function and add it to that function&#39;s module...
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
Definition: Function.h:597
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:767
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
iv Induction Variable Users
Definition: IVUsers.cpp:51
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:176
Represents the cost of inlining a function.
Definition: InlineCost.h:65
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
#define T
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
amdgpu Simplify well known AMD library false Value * Callee
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:22
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isAlways() const
Definition: InlineCost.h:98
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:596
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1330
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:788
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
iterator_range< user_iterator > users()
Definition: Value.h:401
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:104
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:118
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
size_t size() const
Definition: Module.h:580
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1213
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:377
const BasicBlock & front() const
Definition: Function.h:595
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1303
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial ininling"))
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:328
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
user_iterator user_end()
Definition: Value.h:385
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:821