LLVM  6.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Transforms/IPO.h"
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "partial-inlining"
38 
39 STATISTIC(NumPartialInlined,
40  "Number of callsites functions partially inlined into.");
41 
42 // Command line option to disable partial-inlining. The default is false:
43 static cl::opt<bool>
44  DisablePartialInlining("disable-partial-inlining", cl::init(false),
45  cl::Hidden, cl::desc("Disable partial ininling"));
46 // This is an option used by testing:
47 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
48  cl::init(false), cl::ZeroOrMore,
50  cl::desc("Skip Cost Analysis"));
51 
53  "max-num-inline-blocks", cl::init(5), cl::Hidden,
54  cl::desc("Max Number of Blocks To be Partially Inlined"));
55 
56 // Command line option to set the maximum number of partial inlining allowed
57 // for the module. The default value of -1 means no limit.
59  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
60  cl::desc("Max number of partial inlining. The default is unlimited"));
61 
62 // Used only when PGO or user annotated branch data is absent. It is
63 // the least value that is used to weigh the outline region. If BFI
64 // produces larger value, the BFI value will be used.
65 static cl::opt<int>
66  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
68  cl::desc("Relative frequency of outline region to "
69  "the entry block"));
70 
72  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
73  cl::desc("A debug option to add additional penalty to the computed one."));
74 
75 namespace {
76 
77 struct FunctionOutliningInfo {
78  FunctionOutliningInfo()
79  : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
80  ReturnBlockPreds() {}
81  // Returns the number of blocks to be inlined including all blocks
82  // in Entries and one return block.
83  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
84 
85  // A set of blocks including the function entry that guard
86  // the region to be outlined.
88  // The return block that is not included in the outlined region.
89  BasicBlock *ReturnBlock;
90  // The dominating block of the region to be outlined.
91  BasicBlock *NonReturnBlock;
92  // The set of blocks in Entries that that are predecessors to ReturnBlock
93  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
94 };
95 
96 struct PartialInlinerImpl {
97  PartialInlinerImpl(
101  ProfileSummaryInfo *ProfSI)
102  : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
103  bool run(Module &M);
104  Function *unswitchFunction(Function *F);
105 
106  // This class speculatively clones the the function to be partial inlined.
107  // At the end of partial inlining, the remaining callsites to the cloned
108  // function that are not partially inlined will be fixed up to reference
109  // the original function, and the cloned function will be erased.
110  struct FunctionCloner {
111  FunctionCloner(Function *F, FunctionOutliningInfo *OI);
112  ~FunctionCloner();
113 
114  // Prepare for function outlining: making sure there is only
115  // one incoming edge from the extracted/outlined region to
116  // the return block.
117  void NormalizeReturnBlock();
118 
119  // Do function outlining:
120  Function *doFunctionOutlining();
121 
122  Function *OrigFunc = nullptr;
123  Function *ClonedFunc = nullptr;
124  Function *OutlinedFunc = nullptr;
125  BasicBlock *OutliningCallBB = nullptr;
126  // ClonedFunc is inlined in one of its callers after function
127  // outlining.
128  bool IsFunctionInlined = false;
129  // The cost of the region to be outlined.
130  int OutlinedRegionCost = 0;
131  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
132  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
133  };
134 
135 private:
136  int NumPartialInlining = 0;
137  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
138  std::function<TargetTransformInfo &(Function &)> *GetTTI;
140  ProfileSummaryInfo *PSI;
141 
142  // Return the frequency of the OutlininingBB relative to F's entry point.
143  // The result is no larger than 1 and is represented using BP.
144  // (Note that the outlined region's 'head' block can only have incoming
145  // edges from the guarding entry blocks).
146  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
147 
148  // Return true if the callee of CS should be partially inlined with
149  // profit.
150  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
151  BlockFrequency WeightedOutliningRcost,
153 
154  // Try to inline DuplicateFunction (cloned from F with call to
155  // the OutlinedFunction into its callers. Return true
156  // if there is any successful inlining.
157  bool tryPartialInline(FunctionCloner &Cloner);
158 
159  // Compute the mapping from use site of DuplicationFunction to the enclosing
160  // BB's profile count.
161  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
162  DenseMap<User *, uint64_t> &SiteCountMap);
163 
164  bool IsLimitReached() {
165  return (MaxNumPartialInlining != -1 &&
166  NumPartialInlining >= MaxNumPartialInlining);
167  }
168 
169  static CallSite getCallSite(User *U) {
170  CallSite CS;
171  if (CallInst *CI = dyn_cast<CallInst>(U))
172  CS = CallSite(CI);
173  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
174  CS = CallSite(II);
175  else
176  llvm_unreachable("All uses must be calls");
177  return CS;
178  }
179 
180  static CallSite getOneCallSiteTo(Function *F) {
181  User *User = *F->user_begin();
182  return getCallSite(User);
183  }
184 
185  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
186  CallSite CS = getOneCallSiteTo(F);
187  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
188  BasicBlock *Block = CS.getParent();
189  return std::make_tuple(DLoc, Block);
190  }
191 
192  // Returns the costs associated with function outlining:
193  // - The first value is the non-weighted runtime cost for making the call
194  // to the outlined function, including the addtional setup cost in the
195  // outlined function itself;
196  // - The second value is the estimated size of the new call sequence in
197  // basic block Cloner.OutliningCallBB;
198  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
199  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
200  // approximate both the size and runtime cost (Note that in the current
201  // inline cost analysis, there is no clear distinction there either).
202  static int computeBBInlineCost(BasicBlock *BB);
203 
204  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
205 
206 };
207 
208 struct PartialInlinerLegacyPass : public ModulePass {
209  static char ID; // Pass identification, replacement for typeid
210  PartialInlinerLegacyPass() : ModulePass(ID) {
212  }
213 
214  void getAnalysisUsage(AnalysisUsage &AU) const override {
218  }
219  bool runOnModule(Module &M) override {
220  if (skipModule(M))
221  return false;
222 
223  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
225  &getAnalysis<TargetTransformInfoWrapperPass>();
226  ProfileSummaryInfo *PSI =
227  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
228 
229  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
230  [&ACT](Function &F) -> AssumptionCache & {
231  return ACT->getAssumptionCache(F);
232  };
233 
234  std::function<TargetTransformInfo &(Function &)> GetTTI =
235  [&TTIWP](Function &F) -> TargetTransformInfo & {
236  return TTIWP->getTTI(F);
237  };
238 
239  return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
240  }
241 };
242 }
243 
244 std::unique_ptr<FunctionOutliningInfo>
245 PartialInlinerImpl::computeOutliningInfo(Function *F) {
246  BasicBlock *EntryBlock = &F->front();
247  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
248  if (!BR || BR->isUnconditional())
249  return std::unique_ptr<FunctionOutliningInfo>();
250 
251  // Returns true if Succ is BB's successor
252  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
253  return is_contained(successors(BB), Succ);
254  };
255 
256  auto SuccSize = [](BasicBlock *BB) {
257  return std::distance(succ_begin(BB), succ_end(BB));
258  };
259 
260  auto IsReturnBlock = [](BasicBlock *BB) {
261  TerminatorInst *TI = BB->getTerminator();
262  return isa<ReturnInst>(TI);
263  };
264 
265  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
266  if (IsReturnBlock(Succ1))
267  return std::make_tuple(Succ1, Succ2);
268  if (IsReturnBlock(Succ2))
269  return std::make_tuple(Succ2, Succ1);
270 
271  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
272  };
273 
274  // Detect a triangular shape:
275  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
276  if (IsSuccessor(Succ1, Succ2))
277  return std::make_tuple(Succ1, Succ2);
278  if (IsSuccessor(Succ2, Succ1))
279  return std::make_tuple(Succ2, Succ1);
280 
281  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
282  };
283 
284  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
285  llvm::make_unique<FunctionOutliningInfo>();
286 
287  BasicBlock *CurrEntry = EntryBlock;
288  bool CandidateFound = false;
289  do {
290  // The number of blocks to be inlined has already reached
291  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
292  // disables partial inlining for the function.
293  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
294  break;
295 
296  if (SuccSize(CurrEntry) != 2)
297  break;
298 
299  BasicBlock *Succ1 = *succ_begin(CurrEntry);
300  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
301 
302  BasicBlock *ReturnBlock, *NonReturnBlock;
303  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
304 
305  if (ReturnBlock) {
306  OutliningInfo->Entries.push_back(CurrEntry);
307  OutliningInfo->ReturnBlock = ReturnBlock;
308  OutliningInfo->NonReturnBlock = NonReturnBlock;
309  CandidateFound = true;
310  break;
311  }
312 
313  BasicBlock *CommSucc;
315  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
316 
317  if (!CommSucc)
318  break;
319 
320  OutliningInfo->Entries.push_back(CurrEntry);
321  CurrEntry = OtherSucc;
322 
323  } while (true);
324 
325  if (!CandidateFound)
326  return std::unique_ptr<FunctionOutliningInfo>();
327 
328  // Do sanity check of the entries: threre should not
329  // be any successors (not in the entry set) other than
330  // {ReturnBlock, NonReturnBlock}
331  assert(OutliningInfo->Entries[0] == &F->front() &&
332  "Function Entry must be the first in Entries vector");
333  DenseSet<BasicBlock *> Entries;
334  for (BasicBlock *E : OutliningInfo->Entries)
335  Entries.insert(E);
336 
337  // Returns true of BB has Predecessor which is not
338  // in Entries set.
339  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
340  for (auto Pred : predecessors(BB)) {
341  if (!Entries.count(Pred))
342  return true;
343  }
344  return false;
345  };
346  auto CheckAndNormalizeCandidate =
347  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
348  for (BasicBlock *E : OutliningInfo->Entries) {
349  for (auto Succ : successors(E)) {
350  if (Entries.count(Succ))
351  continue;
352  if (Succ == OutliningInfo->ReturnBlock)
353  OutliningInfo->ReturnBlockPreds.push_back(E);
354  else if (Succ != OutliningInfo->NonReturnBlock)
355  return false;
356  }
357  // There should not be any outside incoming edges either:
358  if (HasNonEntryPred(E))
359  return false;
360  }
361  return true;
362  };
363 
364  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
365  return std::unique_ptr<FunctionOutliningInfo>();
366 
367  // Now further growing the candidate's inlining region by
368  // peeling off dominating blocks from the outlining region:
369  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
370  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
371  if (SuccSize(Cand) != 2)
372  break;
373 
374  if (HasNonEntryPred(Cand))
375  break;
376 
377  BasicBlock *Succ1 = *succ_begin(Cand);
378  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
379 
380  BasicBlock *ReturnBlock, *NonReturnBlock;
381  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
382  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
383  break;
384 
385  if (NonReturnBlock->getSinglePredecessor() != Cand)
386  break;
387 
388  // Now grow and update OutlininigInfo:
389  OutliningInfo->Entries.push_back(Cand);
390  OutliningInfo->NonReturnBlock = NonReturnBlock;
391  OutliningInfo->ReturnBlockPreds.push_back(Cand);
392  Entries.insert(Cand);
393  }
394 
395  return OutliningInfo;
396 }
397 
398 // Check if there is PGO data or user annoated branch data:
399 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
400  if (F->getEntryCount())
401  return true;
402  // Now check if any of the entry block has MD_prof data:
403  for (auto *E : OI->Entries) {
405  if (!BR || BR->isUnconditional())
406  continue;
407  uint64_t T, F;
408  if (BR->extractProfMetadata(T, F))
409  return true;
410  }
411  return false;
412 }
413 
415 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
416 
417  auto EntryFreq =
418  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
419  auto OutliningCallFreq =
420  Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
421 
422  auto OutlineRegionRelFreq =
423  BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
424  EntryFreq.getFrequency());
425 
426  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
427  return OutlineRegionRelFreq;
428 
429  // When profile data is not available, we need to be conservative in
430  // estimating the overall savings. Static branch prediction can usually
431  // guess the branch direction right (taken/non-taken), but the guessed
432  // branch probability is usually not biased enough. In case when the
433  // outlined region is predicted to be likely, its probability needs
434  // to be made higher (more biased) to not under-estimate the cost of
435  // function outlining. On the other hand, if the outlined region
436  // is predicted to be less likely, the predicted probablity is usually
437  // higher than the actual. For instance, the actual probability of the
438  // less likely target is only 5%, but the guessed probablity can be
439  // 40%. In the latter case, there is no need for further adjustement.
440  // FIXME: add an option for this.
441  if (OutlineRegionRelFreq < BranchProbability(45, 100))
442  return OutlineRegionRelFreq;
443 
444  OutlineRegionRelFreq = std::max(
445  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
446 
447  return OutlineRegionRelFreq;
448 }
449 
450 bool PartialInlinerImpl::shouldPartialInline(
451  CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
453 
454  using namespace ore;
455  if (SkipCostAnalysis)
456  return true;
457 
458  Instruction *Call = CS.getInstruction();
459  Function *Callee = CS.getCalledFunction();
460  assert(Callee == Cloner.ClonedFunc);
461 
462  Function *Caller = CS.getCaller();
463  auto &CalleeTTI = (*GetTTI)(*Callee);
464  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
465  *GetAssumptionCache, GetBFI, PSI);
466 
467  if (IC.isAlways()) {
468  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
469  << NV("Callee", Cloner.OrigFunc)
470  << " should always be fully inlined, not partially");
471  return false;
472  }
473 
474  if (IC.isNever()) {
475  ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
476  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
477  << NV("Caller", Caller)
478  << " because it should never be inlined (cost=never)");
479  return false;
480  }
481 
482  if (!IC) {
483  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
484  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
485  << NV("Caller", Caller) << " because too costly to inline (cost="
486  << NV("Cost", IC.getCost()) << ", threshold="
487  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
488  return false;
489  }
490  const DataLayout &DL = Caller->getParent()->getDataLayout();
491 
492  // The savings of eliminating the call:
493  int NonWeightedSavings = getCallsiteCost(CS, DL);
494  BlockFrequency NormWeightedSavings(NonWeightedSavings);
495 
496  // Weighted saving is smaller than weighted cost, return false
497  if (NormWeightedSavings < WeightedOutliningRcost) {
498  ORE.emit(
499  OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
500  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
501  << NV("Caller", Caller) << " runtime overhead (overhead="
502  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
503  << ", savings="
504  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
505  << " of making the outlined call is too high");
506 
507  return false;
508  }
509 
510  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
511  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
512  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
513  << " (threshold="
514  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
515  return true;
516 }
517 
518 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
519 // For now use a simplified version. The returned 'InlineCost' will be used
520 // to esimate the size cost as well as runtime cost of the BB.
521 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
522  int InlineCost = 0;
523  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
524  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
525  if (isa<DbgInfoIntrinsic>(I))
526  continue;
527 
528  switch (I->getOpcode()) {
529  case Instruction::BitCast:
530  case Instruction::PtrToInt:
531  case Instruction::IntToPtr:
532  case Instruction::Alloca:
533  continue;
534  case Instruction::GetElementPtr:
535  if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
536  continue;
537  default:
538  break;
539  }
540 
541  IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
542  if (IntrInst) {
543  if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
544  IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
545  continue;
546  }
547 
548  if (CallInst *CI = dyn_cast<CallInst>(I)) {
549  InlineCost += getCallsiteCost(CallSite(CI), DL);
550  continue;
551  }
552 
553  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
554  InlineCost += getCallsiteCost(CallSite(II), DL);
555  continue;
556  }
557 
558  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
559  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
560  continue;
561  }
562  InlineCost += InlineConstants::InstrCost;
563  }
564  return InlineCost;
565 }
566 
567 std::tuple<int, int>
568 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
569 
570  // Now compute the cost of the call sequence to the outlined function
571  // 'OutlinedFunction' in BB 'OutliningCallBB':
572  int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
573 
574  // Now compute the cost of the extracted/outlined function itself:
575  int OutlinedFunctionCost = 0;
576  for (BasicBlock &BB : *Cloner.OutlinedFunc) {
577  OutlinedFunctionCost += computeBBInlineCost(&BB);
578  }
579 
580  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
581  "Outlined function cost should be no less than the outlined region");
582  // The code extractor introduces a new root and exit stub blocks with
583  // additional unconditional branches. Those branches will be eliminated
584  // later with bb layout. The cost should be adjusted accordingly:
585  OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
586 
587  int OutliningRuntimeOverhead =
588  OutliningFuncCallCost +
589  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
591 
592  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
593 }
594 
595 // Create the callsite to profile count map which is
596 // used to update the original function's entry count,
597 // after the function is partially inlined into the callsite.
598 void PartialInlinerImpl::computeCallsiteToProfCountMap(
599  Function *DuplicateFunction,
600  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
601  std::vector<User *> Users(DuplicateFunction->user_begin(),
602  DuplicateFunction->user_end());
603  Function *CurrentCaller = nullptr;
604  std::unique_ptr<BlockFrequencyInfo> TempBFI;
605  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
606 
607  auto ComputeCurrBFI = [&,this](Function *Caller) {
608  // For the old pass manager:
609  if (!GetBFI) {
610  DominatorTree DT(*Caller);
611  LoopInfo LI(DT);
612  BranchProbabilityInfo BPI(*Caller, LI);
613  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
614  CurrentCallerBFI = TempBFI.get();
615  } else {
616  // New pass manager:
617  CurrentCallerBFI = &(*GetBFI)(*Caller);
618  }
619  };
620 
621  for (User *User : Users) {
622  CallSite CS = getCallSite(User);
623  Function *Caller = CS.getCaller();
624  if (CurrentCaller != Caller) {
625  CurrentCaller = Caller;
626  ComputeCurrBFI(Caller);
627  } else {
628  assert(CurrentCallerBFI && "CallerBFI is not set");
629  }
630  BasicBlock *CallBB = CS.getInstruction()->getParent();
631  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
632  if (Count)
633  CallSiteToProfCountMap[User] = *Count;
634  else
635  CallSiteToProfCountMap[User] = 0;
636  }
637 }
638 
639 PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
640  FunctionOutliningInfo *OI)
641  : OrigFunc(F) {
642  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
643 
644  // Clone the function, so that we can hack away on it.
645  ValueToValueMapTy VMap;
646  ClonedFunc = CloneFunction(F, VMap);
647 
648  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
649  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
650  for (BasicBlock *BB : OI->Entries) {
651  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
652  }
653  for (BasicBlock *E : OI->ReturnBlockPreds) {
654  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
655  ClonedOI->ReturnBlockPreds.push_back(NewE);
656  }
657  // Go ahead and update all uses to the duplicate, so that we can just
658  // use the inliner functionality when we're done hacking.
659  F->replaceAllUsesWith(ClonedFunc);
660 }
661 
662 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
663 
664  auto getFirstPHI = [](BasicBlock *BB) {
665  BasicBlock::iterator I = BB->begin();
666  PHINode *FirstPhi = nullptr;
667  while (I != BB->end()) {
668  PHINode *Phi = dyn_cast<PHINode>(I);
669  if (!Phi)
670  break;
671  if (!FirstPhi) {
672  FirstPhi = Phi;
673  break;
674  }
675  }
676  return FirstPhi;
677  };
678 
679  // Special hackery is needed with PHI nodes that have inputs from more than
680  // one extracted block. For simplicity, just split the PHIs into a two-level
681  // sequence of PHIs, some of which will go in the extracted region, and some
682  // of which will go outside.
683  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
684  // only split block when necessary:
685  PHINode *FirstPhi = getFirstPHI(PreReturn);
686  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
687 
688  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
689  return;
690 
691  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
692  Value *CommonValue = PN->getIncomingValue(0);
693  if (all_of(PN->incoming_values(),
694  [&](Value *V) { return V == CommonValue; }))
695  return CommonValue;
696  return nullptr;
697  };
698 
699  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
700  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
701  BasicBlock::iterator I = PreReturn->begin();
702  Instruction *Ins = &ClonedOI->ReturnBlock->front();
704  while (I != PreReturn->end()) {
705  PHINode *OldPhi = dyn_cast<PHINode>(I);
706  if (!OldPhi)
707  break;
708 
709  PHINode *RetPhi =
710  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
711  OldPhi->replaceAllUsesWith(RetPhi);
712  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
713 
714  RetPhi->addIncoming(&*I, PreReturn);
715  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
716  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
717  OldPhi->removeIncomingValue(E);
718  }
719 
720  // After incoming values splitting, the old phi may become trivial.
721  // Keeping the trivial phi can introduce definition inside the outline
722  // region which is live-out, causing necessary overhead (load, store
723  // arg passing etc).
724  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
725  OldPhi->replaceAllUsesWith(OldPhiVal);
726  DeadPhis.push_back(OldPhi);
727  }
728  ++I;
729  }
730  for (auto *DP : DeadPhis)
731  DP->eraseFromParent();
732 
733  for (auto E : ClonedOI->ReturnBlockPreds) {
734  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
735  }
736 }
737 
738 Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
739  // Returns true if the block is to be partial inlined into the caller
740  // (i.e. not to be extracted to the out of line function)
741  auto ToBeInlined = [&, this](BasicBlock *BB) {
742  return BB == ClonedOI->ReturnBlock ||
743  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
744  ClonedOI->Entries.end());
745  };
746 
747  // Gather up the blocks that we're going to extract.
748  std::vector<BasicBlock *> ToExtract;
749  ToExtract.push_back(ClonedOI->NonReturnBlock);
750  OutlinedRegionCost +=
751  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
752  for (BasicBlock &BB : *ClonedFunc)
753  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
754  ToExtract.push_back(&BB);
755  // FIXME: the code extractor may hoist/sink more code
756  // into the outlined function which may make the outlining
757  // overhead (the difference of the outlined function cost
758  // and OutliningRegionCost) look larger.
759  OutlinedRegionCost += computeBBInlineCost(&BB);
760  }
761 
762  // The CodeExtractor needs a dominator tree.
763  DominatorTree DT;
764  DT.recalculate(*ClonedFunc);
765 
766  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
767  LoopInfo LI(DT);
768  BranchProbabilityInfo BPI(*ClonedFunc, LI);
769  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
770 
771  // Extract the body of the if.
772  OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
773  ClonedFuncBFI.get(), &BPI)
774  .extractCodeRegion();
775 
776  if (OutlinedFunc) {
777  OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
778  .getInstruction()
779  ->getParent();
780  assert(OutliningCallBB->getParent() == ClonedFunc);
781  }
782 
783  return OutlinedFunc;
784 }
785 
786 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
787  // Ditch the duplicate, since we're done with it, and rewrite all remaining
788  // users (function pointers, etc.) back to the original function.
789  ClonedFunc->replaceAllUsesWith(OrigFunc);
790  ClonedFunc->eraseFromParent();
791  if (!IsFunctionInlined) {
792  // Remove the function that is speculatively created if there is no
793  // reference.
794  if (OutlinedFunc)
795  OutlinedFunc->eraseFromParent();
796  }
797 }
798 
799 Function *PartialInlinerImpl::unswitchFunction(Function *F) {
800 
801  if (F->hasAddressTaken())
802  return nullptr;
803 
804  // Let inliner handle it
805  if (F->hasFnAttribute(Attribute::AlwaysInline))
806  return nullptr;
807 
808  if (F->hasFnAttribute(Attribute::NoInline))
809  return nullptr;
810 
811  if (PSI->isFunctionEntryCold(F))
812  return nullptr;
813 
814  if (F->user_begin() == F->user_end())
815  return nullptr;
816 
817  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
818 
819  if (!OI)
820  return nullptr;
821 
822  FunctionCloner Cloner(F, OI.get());
823  Cloner.NormalizeReturnBlock();
824  Function *OutlinedFunction = Cloner.doFunctionOutlining();
825 
826  bool AnyInline = tryPartialInline(Cloner);
827 
828  if (AnyInline)
829  return OutlinedFunction;
830 
831  return nullptr;
832 }
833 
834 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
835  int NonWeightedRcost;
836  int SizeCost;
837 
838  if (Cloner.OutlinedFunc == nullptr)
839  return false;
840 
841  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
842 
843  auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
844  auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
845 
846  // The call sequence to the outlined function is larger than the original
847  // outlined region size, it does not increase the chances of inlining
848  // the function with outlining (The inliner usies the size increase to
849  // model the cost of inlining a callee).
850  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
851  OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
852  DebugLoc DLoc;
853  BasicBlock *Block;
854  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
855  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
856  DLoc, Block)
857  << ore::NV("Function", Cloner.OrigFunc)
858  << " not partially inlined into callers (Original Size = "
859  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
860  << ", Size of call sequence to outlined function = "
861  << ore::NV("NewSize", SizeCost) << ")");
862  return false;
863  }
864 
865  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
866  "F's users should all be replaced!");
867 
868  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
869  Cloner.ClonedFunc->user_end());
870 
871  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
872  if (Cloner.OrigFunc->getEntryCount())
873  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
874 
875  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
876  uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
877 
878  bool AnyInline = false;
879  for (User *User : Users) {
880  CallSite CS = getCallSite(User);
881 
882  if (IsLimitReached())
883  continue;
884 
886 
887  if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
888  continue;
889 
890  ORE.emit(
891  OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
892  << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
893  << ore::NV("Caller", CS.getCaller()));
894 
895  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
896  InlineFunction(CS, IFI);
897 
898  // Now update the entry count:
899  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
900  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
901  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
902  }
903 
904  AnyInline = true;
905  NumPartialInlining++;
906  // Update the stats
907  NumPartialInlined++;
908  }
909 
910  if (AnyInline) {
911  Cloner.IsFunctionInlined = true;
912  if (CalleeEntryCount)
913  Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
914  }
915 
916  return AnyInline;
917 }
918 
919 bool PartialInlinerImpl::run(Module &M) {
921  return false;
922 
923  std::vector<Function *> Worklist;
924  Worklist.reserve(M.size());
925  for (Function &F : M)
926  if (!F.use_empty() && !F.isDeclaration())
927  Worklist.push_back(&F);
928 
929  bool Changed = false;
930  while (!Worklist.empty()) {
931  Function *CurrFunc = Worklist.back();
932  Worklist.pop_back();
933 
934  if (CurrFunc->use_empty())
935  continue;
936 
937  bool Recursive = false;
938  for (User *U : CurrFunc->users())
939  if (Instruction *I = dyn_cast<Instruction>(U))
940  if (I->getParent()->getParent() == CurrFunc) {
941  Recursive = true;
942  break;
943  }
944  if (Recursive)
945  continue;
946 
947  if (Function *NewFunc = unswitchFunction(CurrFunc)) {
948  Worklist.push_back(NewFunc);
949  Changed = true;
950  }
951  }
952 
953  return Changed;
954 }
955 
957 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
958  "Partial Inliner", false, false)
962 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
963  "Partial Inliner", false, false)
964 
966  return new PartialInlinerLegacyPass();
967 }
968 
970  ModuleAnalysisManager &AM) {
971  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
972 
973  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
974  [&FAM](Function &F) -> AssumptionCache & {
975  return FAM.getResult<AssumptionAnalysis>(F);
976  };
977 
978  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
979  [&FAM](Function &F) -> BlockFrequencyInfo & {
980  return FAM.getResult<BlockFrequencyAnalysis>(F);
981  };
982 
983  std::function<TargetTransformInfo &(Function &)> GetTTI =
984  [&FAM](Function &F) -> TargetTransformInfo & {
985  return FAM.getResult<TargetIRAnalysis>(F);
986  };
987 
989 
990  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
991  return PreservedAnalyses::none();
992  return PreservedAnalyses::all();
993 }
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:48
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:98
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
partial Partial Inliner
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
CloneFunction - Return a copy of the specified function and add it to that function&#39;s module...
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
Definition: Function.h:589
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:254
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:818
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
iv Induction Variable Users
Definition: IVUsers.cpp:51
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:176
Represents the cost of inlining a function.
Definition: InlineCost.h:64
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
InstrTy * getInstruction() const
Definition: CallSite.h:89
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:106
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max Number of Blocks To be Partially Inlined"))
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
#define T
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:134
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:109
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:22
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
bool isAlways() const
Definition: InlineCost.h:97
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:595
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:187
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1325
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI)
Get an InlineCost object representing the cost of inlining this callsite.
Iterator for intrusive lists based on ilist_node.
#define E
Definition: LargeTest.cpp:27
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:839
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:94
iterator_range< user_iterator > users()
Definition: Value.h:395
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:103
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:111
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
void recalculate(FT &F)
recalculate - compute a dominator tree for the given function
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:264
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:280
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
size_t size() const
Definition: Module.h:580
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:235
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:200
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:104
void Recursive(const uint8_t *Data, size_t Size, int Depth)
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1208
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:371
const BasicBlock & front() const
Definition: Function.h:587
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1303
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial ininling"))
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:322
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
user_iterator user_end()
Definition: Value.h:379
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:872