LLVM  6.0.0svn
Inliner.cpp
Go to the documentation of this file.
1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the mechanics required to implement inlining without
11 // missing any calls and updating the call graph. The decisions of which calls
12 // are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CallSite.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstIterator.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/Debug.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <functional>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "inline"
74 
75 STATISTIC(NumInlined, "Number of functions inlined");
76 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
77 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
78 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
79 
80 // This weirdly named statistic tracks the number of times that, when attempting
81 // to inline a function A into B, we analyze the callers of B in order to see
82 // if those would be more profitable and blocked inline steps.
83 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
84 
85 /// Flag to disable manual alloca merging.
86 ///
87 /// Merging of allocas was originally done as a stack-size saving technique
88 /// prior to LLVM's code generator having support for stack coloring based on
89 /// lifetime markers. It is now in the process of being removed. To experiment
90 /// with disabling it and relying fully on lifetime marker based stack
91 /// coloring, you can pass this flag to LLVM.
92 static cl::opt<bool>
93  DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
94  cl::init(false), cl::Hidden);
95 
96 namespace {
97 
99  No = 0,
100  Basic = 1,
101  Verbose = 2,
102 };
103 
104 } // end anonymous namespace
105 
107  "inliner-function-import-stats",
108  cl::init(InlinerFunctionImportStatsOpts::No),
110  "basic statistics"),
111  clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
112  "printing of statistics for each inlined function")),
113  cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
114 
116 
117 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
118  : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
119 
120 /// For this class, we declare that we require and preserve the call graph.
121 /// If the derived class implements this method, it should
122 /// always explicitly call the implementation here.
129 }
130 
132 
133 /// Look at all of the allocas that we inlined through this call site. If we
134 /// have already inlined other allocas through other calls into this function,
135 /// then we know that they have disjoint lifetimes and that we can merge them.
136 ///
137 /// There are many heuristics possible for merging these allocas, and the
138 /// different options have different tradeoffs. One thing that we *really*
139 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
140 /// longer address taken and so they can be promoted.
141 ///
142 /// Our "solution" for that is to only merge allocas whose outermost type is an
143 /// array type. These are usually not promoted because someone is using a
144 /// variable index into them. These are also often the most important ones to
145 /// merge.
146 ///
147 /// A better solution would be to have real memory lifetime markers in the IR
148 /// and not have the inliner do any merging of allocas at all. This would
149 /// allow the backend to do proper stack slot coloring of all allocas that
150 /// *actually make it to the backend*, which is really what we want.
151 ///
152 /// Because we don't have this information, we do this simple and useful hack.
154  Function *Caller, InlineFunctionInfo &IFI,
155  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
156  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
157 
158  // When processing our SCC, check to see if CS was inlined from some other
159  // call site. For example, if we're processing "A" in this code:
160  // A() { B() }
161  // B() { x = alloca ... C() }
162  // C() { y = alloca ... }
163  // Assume that C was not inlined into B initially, and so we're processing A
164  // and decide to inline B into A. Doing this makes an alloca available for
165  // reuse and makes a callsite (C) available for inlining. When we process
166  // the C call site we don't want to do any alloca merging between X and Y
167  // because their scopes are not disjoint. We could make this smarter by
168  // keeping track of the inline history for each alloca in the
169  // InlinedArrayAllocas but this isn't likely to be a significant win.
170  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
171  return;
172 
173  // Loop over all the allocas we have so far and see if they can be merged with
174  // a previously inlined alloca. If not, remember that we had it.
175  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
176  ++AllocaNo) {
177  AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
178 
179  // Don't bother trying to merge array allocations (they will usually be
180  // canonicalized to be an allocation *of* an array), or allocations whose
181  // type is not itself an array (because we're afraid of pessimizing SRoA).
183  if (!ATy || AI->isArrayAllocation())
184  continue;
185 
186  // Get the list of all available allocas for this array type.
187  std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
188 
189  // Loop over the allocas in AllocasForType to see if we can reuse one. Note
190  // that we have to be careful not to reuse the same "available" alloca for
191  // multiple different allocas that we just inlined, we use the 'UsedAllocas'
192  // set to keep track of which "available" allocas are being used by this
193  // function. Also, AllocasForType can be empty of course!
194  bool MergedAwayAlloca = false;
195  for (AllocaInst *AvailableAlloca : AllocasForType) {
196  unsigned Align1 = AI->getAlignment(),
197  Align2 = AvailableAlloca->getAlignment();
198 
199  // The available alloca has to be in the right function, not in some other
200  // function in this SCC.
201  if (AvailableAlloca->getParent() != AI->getParent())
202  continue;
203 
204  // If the inlined function already uses this alloca then we can't reuse
205  // it.
206  if (!UsedAllocas.insert(AvailableAlloca).second)
207  continue;
208 
209  // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
210  // success!
211  DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
212  << "\n\t\tINTO: " << *AvailableAlloca << '\n');
213 
214  // Move affected dbg.declare calls immediately after the new alloca to
215  // avoid the situation when a dbg.declare precedes its alloca.
216  if (auto *L = LocalAsMetadata::getIfExists(AI))
217  if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
218  for (User *U : MDV->users())
219  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
220  DDI->moveBefore(AvailableAlloca->getNextNode());
221 
222  AI->replaceAllUsesWith(AvailableAlloca);
223 
224  if (Align1 != Align2) {
225  if (!Align1 || !Align2) {
226  const DataLayout &DL = Caller->getParent()->getDataLayout();
227  unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
228 
229  Align1 = Align1 ? Align1 : TypeAlign;
230  Align2 = Align2 ? Align2 : TypeAlign;
231  }
232 
233  if (Align1 > Align2)
234  AvailableAlloca->setAlignment(AI->getAlignment());
235  }
236 
237  AI->eraseFromParent();
238  MergedAwayAlloca = true;
239  ++NumMergedAllocas;
240  IFI.StaticAllocas[AllocaNo] = nullptr;
241  break;
242  }
243 
244  // If we already nuked the alloca, we're done with it.
245  if (MergedAwayAlloca)
246  continue;
247 
248  // If we were unable to merge away the alloca either because there are no
249  // allocas of the right type available or because we reused them all
250  // already, remember that this alloca came from an inlined function and mark
251  // it used so we don't reuse it for other allocas from this inline
252  // operation.
253  AllocasForType.push_back(AI);
254  UsedAllocas.insert(AI);
255  }
256 }
257 
258 /// If it is possible to inline the specified call site,
259 /// do so and update the CallGraph for this operation.
260 ///
261 /// This function also does some basic book-keeping to update the IR. The
262 /// InlinedArrayAllocas map keeps track of any allocas that are already
263 /// available from other functions inlined into the caller. If we are able to
264 /// inline this call site we attempt to reuse already available allocas or add
265 /// any new allocas to the set if not possible.
267  CallSite CS, InlineFunctionInfo &IFI,
268  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
269  bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
272  Function *Caller = CS.getCaller();
273 
274  AAResults &AAR = AARGetter(*Callee);
275 
276  // Try to inline the function. Get the list of static allocas that were
277  // inlined.
278  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
279  return false;
280 
281  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
282  ImportedFunctionsStats.recordInline(*Caller, *Callee);
283 
285 
287  mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
288 
289  return true;
290 }
291 
292 /// Return true if inlining of CS can block the caller from being
293 /// inlined which is proved to be more beneficial. \p IC is the
294 /// estimated inline cost associated with callsite \p CS.
295 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the
296 /// caller if \p CS is suppressed for inlining.
297 static bool
299  int &TotalSecondaryCost,
300  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
301  // For now we only handle local or inline functions.
302  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
303  return false;
304  // Try to detect the case where the current inlining candidate caller (call
305  // it B) is a static or linkonce-ODR function and is an inlining candidate
306  // elsewhere, and the current candidate callee (call it C) is large enough
307  // that inlining it into B would make B too big to inline later. In these
308  // circumstances it may be best not to inline C into B, but to inline B into
309  // its callers.
310  //
311  // This only applies to static and linkonce-ODR functions because those are
312  // expected to be available for inlining in the translation units where they
313  // are used. Thus we will always have the opportunity to make local inlining
314  // decisions. Importantly the linkonce-ODR linkage covers inline functions
315  // and templates in C++.
316  //
317  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
318  // the internal implementation of the inline cost metrics rather than
319  // treating them as truly abstract units etc.
320  TotalSecondaryCost = 0;
321  // The candidate cost to be imposed upon the current function.
322  int CandidateCost = IC.getCost() - 1;
323  // This bool tracks what happens if we do NOT inline C into B.
324  bool callerWillBeRemoved = Caller->hasLocalLinkage();
325  // This bool tracks what happens if we DO inline C into B.
326  bool inliningPreventsSomeOuterInline = false;
327  for (User *U : Caller->users()) {
328  CallSite CS2(U);
329 
330  // If this isn't a call to Caller (it could be some other sort
331  // of reference) skip it. Such references will prevent the caller
332  // from being removed.
333  if (!CS2 || CS2.getCalledFunction() != Caller) {
334  callerWillBeRemoved = false;
335  continue;
336  }
337 
338  InlineCost IC2 = GetInlineCost(CS2);
339  ++NumCallerCallersAnalyzed;
340  if (!IC2) {
341  callerWillBeRemoved = false;
342  continue;
343  }
344  if (IC2.isAlways())
345  continue;
346 
347  // See if inlining of the original callsite would erase the cost delta of
348  // this callsite. We subtract off the penalty for the call instruction,
349  // which we would be deleting.
350  if (IC2.getCostDelta() <= CandidateCost) {
351  inliningPreventsSomeOuterInline = true;
352  TotalSecondaryCost += IC2.getCost();
353  }
354  }
355  // If all outer calls to Caller would get inlined, the cost for the last
356  // one is set very low by getInlineCost, in anticipation that Caller will
357  // be removed entirely. We did not account for this above unless there
358  // is only one caller of Caller.
359  if (callerWillBeRemoved && !Caller->hasOneUse())
360  TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
361 
362  if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
363  return true;
364 
365  return false;
366 }
367 
368 /// Return the cost only if the inliner should attempt to inline at the given
369 /// CallSite. If we return the cost, we will emit an optimisation remark later
370 /// using that cost, so we won't do so from this function.
374  using namespace ore;
375 
376  InlineCost IC = GetInlineCost(CS);
377  Instruction *Call = CS.getInstruction();
379  Function *Caller = CS.getCaller();
380 
381  if (IC.isAlways()) {
382  DEBUG(dbgs() << " Inlining: cost=always"
383  << ", Call: " << *CS.getInstruction() << "\n");
384  return IC;
385  }
386 
387  if (IC.isNever()) {
388  DEBUG(dbgs() << " NOT Inlining: cost=never"
389  << ", Call: " << *CS.getInstruction() << "\n");
390  ORE.emit([&]() {
391  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
392  << NV("Callee", Callee) << " not inlined into "
393  << NV("Caller", Caller)
394  << " because it should never be inlined (cost=never)";
395  });
396  return None;
397  }
398 
399  if (!IC) {
400  DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
401  << ", thres=" << IC.getThreshold()
402  << ", Call: " << *CS.getInstruction() << "\n");
403  ORE.emit([&]() {
404  return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
405  << NV("Callee", Callee) << " not inlined into "
406  << NV("Caller", Caller) << " because too costly to inline (cost="
407  << NV("Cost", IC.getCost())
408  << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
409  });
410  return None;
411  }
412 
413  int TotalSecondaryCost = 0;
414  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
415  DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
416  << " Cost = " << IC.getCost()
417  << ", outer Cost = " << TotalSecondaryCost << '\n');
418  ORE.emit([&]() {
419  return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
420  Call)
421  << "Not inlining. Cost of inlining " << NV("Callee", Callee)
422  << " increases the cost of inlining " << NV("Caller", Caller)
423  << " in other contexts";
424  });
425 
426  // IC does not bool() to false, so get an InlineCost that will.
427  // This will not be inspected to make an error message.
428  return None;
429  }
430 
431  DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
432  << ", thres=" << IC.getThreshold()
433  << ", Call: " << *CS.getInstruction() << '\n');
434  return IC;
435 }
436 
437 /// Return true if the specified inline history ID
438 /// indicates an inline history that includes the specified function.
440  Function *F, int InlineHistoryID,
441  const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
442  while (InlineHistoryID != -1) {
443  assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
444  "Invalid inline history ID");
445  if (InlineHistory[InlineHistoryID].first == F)
446  return true;
447  InlineHistoryID = InlineHistory[InlineHistoryID].second;
448  }
449  return false;
450 }
451 
453  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
455  return false; // No changes to CallGraph.
456 }
457 
459  if (skipSCC(SCC))
460  return false;
461  return inlineCalls(SCC);
462 }
463 
464 static bool
466  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
468  bool InsertLifetime,
469  function_ref<InlineCost(CallSite CS)> GetInlineCost,
470  function_ref<AAResults &(Function &)> AARGetter,
472  SmallPtrSet<Function *, 8> SCCFunctions;
473  DEBUG(dbgs() << "Inliner visiting SCC:");
474  for (CallGraphNode *Node : SCC) {
475  Function *F = Node->getFunction();
476  if (F)
477  SCCFunctions.insert(F);
478  DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
479  }
480 
481  // Scan through and identify all call sites ahead of time so that we only
482  // inline call sites in the original functions, not call sites that result
483  // from inlining other functions.
485 
486  // When inlining a callee produces new call sites, we want to keep track of
487  // the fact that they were inlined from the callee. This allows us to avoid
488  // infinite inlining in some obscure cases. To represent this, we use an
489  // index into the InlineHistory vector.
490  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
491 
492  for (CallGraphNode *Node : SCC) {
493  Function *F = Node->getFunction();
494  if (!F || F->isDeclaration())
495  continue;
496 
498  for (BasicBlock &BB : *F)
499  for (Instruction &I : BB) {
500  CallSite CS(cast<Value>(&I));
501  // If this isn't a call, or it is a call to an intrinsic, it can
502  // never be inlined.
503  if (!CS || isa<IntrinsicInst>(I))
504  continue;
505 
506  // If this is a direct call to an external function, we can never inline
507  // it. If it is an indirect call, inlining may resolve it to be a
508  // direct call, so we keep it.
509  if (Function *Callee = CS.getCalledFunction())
510  if (Callee->isDeclaration()) {
511  using namespace ore;
512 
513  ORE.emit([&]() {
514  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
515  << NV("Callee", Callee) << " will not be inlined into "
516  << NV("Caller", CS.getCaller())
517  << " because its definition is unavailable"
518  << setIsVerbose();
519  });
520  continue;
521  }
522 
523  CallSites.push_back(std::make_pair(CS, -1));
524  }
525  }
526 
527  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
528 
529  // If there are no calls in this function, exit early.
530  if (CallSites.empty())
531  return false;
532 
533  // Now that we have all of the call sites, move the ones to functions in the
534  // current SCC to the end of the list.
535  unsigned FirstCallInSCC = CallSites.size();
536  for (unsigned i = 0; i < FirstCallInSCC; ++i)
537  if (Function *F = CallSites[i].first.getCalledFunction())
538  if (SCCFunctions.count(F))
539  std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
540 
541  InlinedArrayAllocasTy InlinedArrayAllocas;
542  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
543 
544  // Now that we have all of the call sites, loop over them and inline them if
545  // it looks profitable to do so.
546  bool Changed = false;
547  bool LocalChange;
548  do {
549  LocalChange = false;
550  // Iterate over the outer loop because inlining functions can cause indirect
551  // calls to become direct calls.
552  // CallSites may be modified inside so ranged for loop can not be used.
553  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
554  CallSite CS = CallSites[CSi].first;
555 
556  Function *Caller = CS.getCaller();
558 
559  // We can only inline direct calls to non-declarations.
560  if (!Callee || Callee->isDeclaration())
561  continue;
562 
563  Instruction *Instr = CS.getInstruction();
564 
565  bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
566 
567  int InlineHistoryID;
568  if (!IsTriviallyDead) {
569  // If this call site was obtained by inlining another function, verify
570  // that the include path for the function did not include the callee
571  // itself. If so, we'd be recursively inlining the same function,
572  // which would provide the same callsites, which would cause us to
573  // infinitely inline.
574  InlineHistoryID = CallSites[CSi].second;
575  if (InlineHistoryID != -1 &&
576  InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
577  continue;
578  }
579 
580  // FIXME for new PM: because of the old PM we currently generate ORE and
581  // in turn BFI on demand. With the new PM, the ORE dependency should
582  // just become a regular analysis dependency.
583  OptimizationRemarkEmitter ORE(Caller);
584 
585  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
586  // If the policy determines that we should inline this function,
587  // delete the call instead.
588  if (!OIC)
589  continue;
590 
591  // If this call site is dead and it is to a readonly function, we should
592  // just delete the call instead of trying to inline it, regardless of
593  // size. This happens because IPSCCP propagates the result out of the
594  // call and then we're left with the dead call.
595  if (IsTriviallyDead) {
596  DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
597  // Update the call graph by deleting the edge from Callee to Caller.
598  CG[Caller]->removeCallEdgeFor(CS);
599  Instr->eraseFromParent();
600  ++NumCallsDeleted;
601  } else {
602  // Get DebugLoc to report. CS will be invalid after Inliner.
603  DebugLoc DLoc = CS->getDebugLoc();
604  BasicBlock *Block = CS.getParent();
605 
606  // Attempt to inline the function.
607  using namespace ore;
608 
609  if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
610  InlineHistoryID, InsertLifetime, AARGetter,
611  ImportedFunctionsStats)) {
612  ORE.emit([&]() {
613  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
614  Block)
615  << NV("Callee", Callee) << " will not be inlined into "
616  << NV("Caller", Caller);
617  });
618  continue;
619  }
620  ++NumInlined;
621 
622  ORE.emit([&]() {
623  bool AlwaysInline = OIC->isAlways();
624  StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
625  OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
626  R << NV("Callee", Callee) << " inlined into ";
627  R << NV("Caller", Caller);
628  if (AlwaysInline)
629  R << " with cost=always";
630  else {
631  R << " with cost=" << NV("Cost", OIC->getCost());
632  R << " (threshold=" << NV("Threshold", OIC->getThreshold());
633  R << ")";
634  }
635  return R;
636  });
637 
638  // If inlining this function gave us any new call sites, throw them
639  // onto our worklist to process. They are useful inline candidates.
640  if (!InlineInfo.InlinedCalls.empty()) {
641  // Create a new inline history entry for this, so that we remember
642  // that these new callsites came about due to inlining Callee.
643  int NewHistoryID = InlineHistory.size();
644  InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
645 
646  for (Value *Ptr : InlineInfo.InlinedCalls)
647  CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
648  }
649  }
650 
651  // If we inlined or deleted the last possible call site to the function,
652  // delete the function body now.
653  if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
654  // TODO: Can remove if in SCC now.
655  !SCCFunctions.count(Callee) &&
656  // The function may be apparently dead, but if there are indirect
657  // callgraph references to the node, we cannot delete it yet, this
658  // could invalidate the CGSCC iterator.
659  CG[Callee]->getNumReferences() == 0) {
660  DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName()
661  << "\n");
662  CallGraphNode *CalleeNode = CG[Callee];
663 
664  // Remove any call graph edges from the callee to its callees.
665  CalleeNode->removeAllCalledFunctions();
666 
667  // Removing the node for callee from the call graph and delete it.
668  delete CG.removeFunctionFromModule(CalleeNode);
669  ++NumDeleted;
670  }
671 
672  // Remove this call site from the list. If possible, use
673  // swap/pop_back for efficiency, but do not use it if doing so would
674  // move a call site to a function in this SCC before the
675  // 'FirstCallInSCC' barrier.
676  if (SCC.isSingular()) {
677  CallSites[CSi] = CallSites.back();
678  CallSites.pop_back();
679  } else {
680  CallSites.erase(CallSites.begin() + CSi);
681  }
682  --CSi;
683 
684  Changed = true;
685  LocalChange = true;
686  }
687  } while (LocalChange);
688 
689  return Changed;
690 }
691 
693  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
694  ACT = &getAnalysis<AssumptionCacheTracker>();
695  PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
696  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
697  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
698  return ACT->getAssumptionCache(F);
699  };
700  return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
701  [this](CallSite CS) { return getInlineCost(CS); },
703 }
704 
705 /// Remove now-dead linkonce functions at the end of
706 /// processing to avoid breaking the SCC traversal.
708  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
709  ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
710  InlinerFunctionImportStatsOpts::Verbose);
711  return removeDeadFunctions(CG);
712 }
713 
714 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
716  bool AlwaysInlineOnly) {
717  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
718  SmallVector<Function *, 16> DeadFunctionsInComdats;
719 
720  auto RemoveCGN = [&](CallGraphNode *CGN) {
721  // Remove any call graph edges from the function to its callees.
722  CGN->removeAllCalledFunctions();
723 
724  // Remove any edges from the external node to the function's call graph
725  // node. These edges might have been made irrelegant due to
726  // optimization of the program.
728 
729  // Removing the node for callee from the call graph and delete it.
730  FunctionsToRemove.push_back(CGN);
731  };
732 
733  // Scan for all of the functions, looking for ones that should now be removed
734  // from the program. Insert the dead ones in the FunctionsToRemove set.
735  for (const auto &I : CG) {
736  CallGraphNode *CGN = I.second.get();
737  Function *F = CGN->getFunction();
738  if (!F || F->isDeclaration())
739  continue;
740 
741  // Handle the case when this function is called and we only want to care
742  // about always-inline functions. This is a bit of a hack to share code
743  // between here and the InlineAlways pass.
744  if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
745  continue;
746 
747  // If the only remaining users of the function are dead constants, remove
748  // them.
750 
751  if (!F->isDefTriviallyDead())
752  continue;
753 
754  // It is unsafe to drop a function with discardable linkage from a COMDAT
755  // without also dropping the other members of the COMDAT.
756  // The inliner doesn't visit non-function entities which are in COMDAT
757  // groups so it is unsafe to do so *unless* the linkage is local.
758  if (!F->hasLocalLinkage()) {
759  if (F->hasComdat()) {
760  DeadFunctionsInComdats.push_back(F);
761  continue;
762  }
763  }
764 
765  RemoveCGN(CGN);
766  }
767  if (!DeadFunctionsInComdats.empty()) {
768  // Filter out the functions whose comdats remain alive.
769  filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
770  // Remove the rest.
771  for (Function *F : DeadFunctionsInComdats)
772  RemoveCGN(CG[F]);
773  }
774 
775  if (FunctionsToRemove.empty())
776  return false;
777 
778  // Now that we know which functions to delete, do so. We didn't want to do
779  // this inline, because that would invalidate our CallGraph::iterator
780  // objects. :(
781  //
782  // Note that it doesn't matter that we are iterating over a non-stable order
783  // here to do this, it doesn't matter which order the functions are deleted
784  // in.
785  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
786  FunctionsToRemove.erase(
787  std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
788  FunctionsToRemove.end());
789  for (CallGraphNode *CGN : FunctionsToRemove) {
790  delete CG.removeFunctionFromModule(CGN);
791  ++NumDeleted;
792  }
793  return true;
794 }
795 
798  CGSCCUpdateResult &UR) {
799  const ModuleAnalysisManager &MAM =
800  AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
801  bool Changed = false;
802 
803  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
804  Module &M = *InitialC.begin()->getFunction().getParent();
806 
807  // We use a single common worklist for calls across the entire SCC. We
808  // process these in-order and append new calls introduced during inlining to
809  // the end.
810  //
811  // Note that this particular order of processing is actually critical to
812  // avoid very bad behaviors. Consider *highly connected* call graphs where
813  // each function contains a small amonut of code and a couple of calls to
814  // other functions. Because the LLVM inliner is fundamentally a bottom-up
815  // inliner, it can handle gracefully the fact that these all appear to be
816  // reasonable inlining candidates as it will flatten things until they become
817  // too big to inline, and then move on and flatten another batch.
818  //
819  // However, when processing call edges *within* an SCC we cannot rely on this
820  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
821  // functions we can end up incrementally inlining N calls into each of
822  // N functions because each incremental inlining decision looks good and we
823  // don't have a topological ordering to prevent explosions.
824  //
825  // To compensate for this, we don't process transitive edges made immediate
826  // by inlining until we've done one pass of inlining across the entire SCC.
827  // Large, highly connected SCCs still lead to some amount of code bloat in
828  // this model, but it is uniformly spread across all the functions in the SCC
829  // and eventually they all become too large to inline, rather than
830  // incrementally maknig a single function grow in a super linear fashion.
832 
833  // Populate the initial list of calls in this SCC.
834  for (auto &N : InitialC) {
835  // We want to generally process call sites top-down in order for
836  // simplifications stemming from replacing the call with the returned value
837  // after inlining to be visible to subsequent inlining decisions.
838  // FIXME: Using instructions sequence is a really bad way to do this.
839  // Instead we should do an actual RPO walk of the function body.
840  for (Instruction &I : instructions(N.getFunction()))
841  if (auto CS = CallSite(&I))
842  if (Function *Callee = CS.getCalledFunction())
843  if (!Callee->isDeclaration())
844  Calls.push_back({CS, -1});
845  }
846  if (Calls.empty())
847  return PreservedAnalyses::all();
848 
849  // Capture updatable variables for the current SCC and RefSCC.
850  auto *C = &InitialC;
851  auto *RC = &C->getOuterRefSCC();
852 
853  // When inlining a callee produces new call sites, we want to keep track of
854  // the fact that they were inlined from the callee. This allows us to avoid
855  // infinite inlining in some obscure cases. To represent this, we use an
856  // index into the InlineHistory vector.
857  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
858 
859  // Track a set vector of inlined callees so that we can augment the caller
860  // with all of their edges in the call graph before pruning out the ones that
861  // got simplified away.
862  SmallSetVector<Function *, 4> InlinedCallees;
863 
864  // Track the dead functions to delete once finished with inlining calls. We
865  // defer deleting these to make it easier to handle the call graph updates.
866  SmallVector<Function *, 4> DeadFunctions;
867 
868  // Loop forward over all of the calls. Note that we cannot cache the size as
869  // inlining can introduce new calls that need to be processed.
870  for (int i = 0; i < (int)Calls.size(); ++i) {
871  // We expect the calls to typically be batched with sequences of calls that
872  // have the same caller, so we first set up some shared infrastructure for
873  // this caller. We also do any pruning we can at this layer on the caller
874  // alone.
875  Function &F = *Calls[i].first.getCaller();
876  LazyCallGraph::Node &N = *CG.lookup(F);
877  if (CG.lookupSCC(N) != C)
878  continue;
879  if (F.hasFnAttribute(Attribute::OptimizeNone))
880  continue;
881 
882  DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
883 
884  // Get a FunctionAnalysisManager via a proxy for this particular node. We
885  // do this each time we visit a node as the SCC may have changed and as
886  // we're going to mutate this particular function we want to make sure the
887  // proxy is in place to forward any invalidation events. We can use the
888  // manager we get here for looking up results for functions other than this
889  // node however because those functions aren't going to be mutated by this
890  // pass.
893  .getManager();
894 
895  // Get the remarks emission analysis for the caller.
896  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
897 
898  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
899  [&](Function &F) -> AssumptionCache & {
900  return FAM.getResult<AssumptionAnalysis>(F);
901  };
902  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
903  return FAM.getResult<BlockFrequencyAnalysis>(F);
904  };
905 
906  auto GetInlineCost = [&](CallSite CS) {
908  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
909  return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
910  PSI, &ORE);
911  };
912 
913  // Now process as many calls as we have within this caller in the sequnece.
914  // We bail out as soon as the caller has to change so we can update the
915  // call graph and prepare the context of that new caller.
916  bool DidInline = false;
917  for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
918  int InlineHistoryID;
919  CallSite CS;
920  std::tie(CS, InlineHistoryID) = Calls[i];
922 
923  if (InlineHistoryID != -1 &&
924  InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
925  continue;
926 
927  // Check if this inlining may repeat breaking an SCC apart that has
928  // already been split once before. In that case, inlining here may
929  // trigger infinite inlining, much like is prevented within the inliner
930  // itself by the InlineHistory above, but spread across CGSCC iterations
931  // and thus hidden from the full inline history.
932  if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
933  UR.InlinedInternalEdges.count({&N, C})) {
934  DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
935  "previously split out of this SCC by inlining: "
936  << F.getName() << " -> " << Callee.getName() << "\n");
937  continue;
938  }
939 
940  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
941  // Check whether we want to inline this callsite.
942  if (!OIC)
943  continue;
944 
945  // Setup the data structure used to plumb customization into the
946  // `InlineFunction` routine.
947  InlineFunctionInfo IFI(
948  /*cg=*/nullptr, &GetAssumptionCache, PSI,
949  &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
950  &FAM.getResult<BlockFrequencyAnalysis>(Callee));
951 
952  // Get DebugLoc to report. CS will be invalid after Inliner.
953  DebugLoc DLoc = CS->getDebugLoc();
954  BasicBlock *Block = CS.getParent();
955 
956  using namespace ore;
957 
958  if (!InlineFunction(CS, IFI)) {
959  ORE.emit([&]() {
960  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
961  << NV("Callee", &Callee) << " will not be inlined into "
962  << NV("Caller", &F);
963  });
964  continue;
965  }
966  DidInline = true;
967  InlinedCallees.insert(&Callee);
968 
969  ORE.emit([&]() {
970  bool AlwaysInline = OIC->isAlways();
971  StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
972  OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
973  R << NV("Callee", &Callee) << " inlined into ";
974  R << NV("Caller", &F);
975  if (AlwaysInline)
976  R << " with cost=always";
977  else {
978  R << " with cost=" << NV("Cost", OIC->getCost());
979  R << " (threshold=" << NV("Threshold", OIC->getThreshold());
980  R << ")";
981  }
982  return R;
983  });
984 
985  // Add any new callsites to defined functions to the worklist.
986  if (!IFI.InlinedCallSites.empty()) {
987  int NewHistoryID = InlineHistory.size();
988  InlineHistory.push_back({&Callee, InlineHistoryID});
989  for (CallSite &CS : reverse(IFI.InlinedCallSites))
990  if (Function *NewCallee = CS.getCalledFunction())
991  if (!NewCallee->isDeclaration())
992  Calls.push_back({CS, NewHistoryID});
993  }
994 
995  // Merge the attributes based on the inlining.
997 
998  // For local functions, check whether this makes the callee trivially
999  // dead. In that case, we can drop the body of the function eagerly
1000  // which may reduce the number of callers of other functions to one,
1001  // changing inline cost thresholds.
1002  if (Callee.hasLocalLinkage()) {
1003  // To check this we also need to nuke any dead constant uses (perhaps
1004  // made dead by this operation on other functions).
1005  Callee.removeDeadConstantUsers();
1006  if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
1007  Calls.erase(
1008  std::remove_if(Calls.begin() + i + 1, Calls.end(),
1009  [&Callee](const std::pair<CallSite, int> &Call) {
1010  return Call.first.getCaller() == &Callee;
1011  }),
1012  Calls.end());
1013  // Clear the body and queue the function itself for deletion when we
1014  // finish inlining and call graph updates.
1015  // Note that after this point, it is an error to do anything other
1016  // than use the callee's address or delete it.
1017  Callee.dropAllReferences();
1018  assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
1019  "Cannot put cause a function to become dead twice!");
1020  DeadFunctions.push_back(&Callee);
1021  }
1022  }
1023  }
1024 
1025  // Back the call index up by one to put us in a good position to go around
1026  // the outer loop.
1027  --i;
1028 
1029  if (!DidInline)
1030  continue;
1031  Changed = true;
1032 
1033  // Add all the inlined callees' edges as ref edges to the caller. These are
1034  // by definition trivial edges as we always have *some* transitive ref edge
1035  // chain. While in some cases these edges are direct calls inside the
1036  // callee, they have to be modeled in the inliner as reference edges as
1037  // there may be a reference edge anywhere along the chain from the current
1038  // caller to the callee that causes the whole thing to appear like
1039  // a (transitive) reference edge that will require promotion to a call edge
1040  // below.
1041  for (Function *InlinedCallee : InlinedCallees) {
1042  LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
1043  for (LazyCallGraph::Edge &E : *CalleeN)
1044  RC->insertTrivialRefEdge(N, E.getNode());
1045  }
1046 
1047  // At this point, since we have made changes we have at least removed
1048  // a call instruction. However, in the process we do some incremental
1049  // simplification of the surrounding code. This simplification can
1050  // essentially do all of the same things as a function pass and we can
1051  // re-use the exact same logic for updating the call graph to reflect the
1052  // change.
1053  LazyCallGraph::SCC *OldC = C;
1054  C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
1055  DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
1056  RC = &C->getOuterRefSCC();
1057 
1058  // If this causes an SCC to split apart into multiple smaller SCCs, there
1059  // is a subtle risk we need to prepare for. Other transformations may
1060  // expose an "infinite inlining" opportunity later, and because of the SCC
1061  // mutation, we will revisit this function and potentially re-inline. If we
1062  // do, and that re-inlining also has the potentially to mutate the SCC
1063  // structure, the infinite inlining problem can manifest through infinite
1064  // SCC splits and merges. To avoid this, we capture the originating caller
1065  // node and the SCC containing the call edge. This is a slight over
1066  // approximation of the possible inlining decisions that must be avoided,
1067  // but is relatively efficient to store.
1068  // FIXME: This seems like a very heavyweight way of retaining the inline
1069  // history, we should look for a more efficient way of tracking it.
1070  if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
1071  return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
1072  })) {
1073  DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
1074  "retaining this to avoid infinite inlining.\n");
1075  UR.InlinedInternalEdges.insert({&N, OldC});
1076  }
1077  InlinedCallees.clear();
1078  }
1079 
1080  // Now that we've finished inlining all of the calls across this SCC, delete
1081  // all of the trivially dead functions, updating the call graph and the CGSCC
1082  // pass manager in the process.
1083  //
1084  // Note that this walks a pointer set which has non-deterministic order but
1085  // that is OK as all we do is delete things and add pointers to unordered
1086  // sets.
1087  for (Function *DeadF : DeadFunctions) {
1088  // Get the necessary information out of the call graph and nuke the
1089  // function there. Also, cclear out any cached analyses.
1090  auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
1093  .getManager();
1094  FAM.clear(*DeadF, DeadF->getName());
1095  AM.clear(DeadC, DeadC.getName());
1096  auto &DeadRC = DeadC.getOuterRefSCC();
1097  CG.removeDeadFunction(*DeadF);
1098 
1099  // Mark the relevant parts of the call graph as invalid so we don't visit
1100  // them.
1101  UR.InvalidatedSCCs.insert(&DeadC);
1102  UR.InvalidatedRefSCCs.insert(&DeadRC);
1103 
1104  // And delete the actual function from the module.
1105  M.getFunctionList().erase(DeadF);
1106  }
1107 
1108  if (!Changed)
1109  return PreservedAnalyses::all();
1110 
1111  // Even if we change the IR, we update the core CGSCC data structures and so
1112  // can preserve the proxy to the function analysis manager.
1113  PreservedAnalyses PA;
1115  return PA;
1116 }
InlinerFunctionImportStatsOpts
Definition: Inliner.cpp:98
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
bool doFinalization(CallGraph &CG) override
Remove now-dead linkonce functions at the end of processing to avoid breaking the SCC traversal...
Definition: Inliner.cpp:707
void setModuleInfo(const Module &M)
Set information like AllFunctions, ImportedFunctions, ModuleName.
bool hasLocalLinkage() const
Definition: GlobalValue.h:427
Diagnostic information for missed-optimization remarks.
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:99
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl< std::pair< Function *, int >> &InlineHistory)
Return true if the specified inline history ID indicates an inline history that includes the specifie...
Definition: Inliner.cpp:439
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LazyCallGraph::SCC & updateCGAndAnalysisManagerForFunctionPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR)
Helper to update the call graph after running a function pass.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
void recordInline(const Function &Caller, const Function &Callee)
Record inline of.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManager.h:657
virtual InlineCost getInlineCost(CallSite CS)=0
This method must be implemented by the subclass to determine the cost of inlining the specified call ...
SCC * lookupSCC(Node &N) const
Lookup a function&#39;s SCC in the graph.
Analysis providing profile information.
Implements a lazy call graph analysis and related passes for the new pass manager.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
Calculate and dump ThinLTO specific inliner stats.
InlineFunctionInfo - This class captures the data input to the InlineFunction call, and records the auxiliary results produced by it.
Definition: Cloning.h:176
static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory)
Look at all of the allocas that we inlined through this call site.
Definition: Inliner.cpp:153
A proxy from a FunctionAnalysisManager to an SCC.
A node in the call graph for a module.
Definition: CallGraph.h:165
bool skipSCC(CallGraphSCC &SCC) const
Optional passes call this function to check whether the pass should be skipped.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Represents the cost of inlining a function.
Definition: InlineCost.h:65
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:114
bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
InlineFunction - This function inlines the called function into the basic block of the caller...
AnalysisUsage & addRequired()
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
Definition: Inliner.cpp:458
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This class is a functor to be used in legacy module or SCC passes for computing AA results for a func...
bool isLibFunction(Function &F) const
Test whether a function is a known and defined library function tracked by the call graph...
This file contains the simple types necessary to represent the attributes associated with functions a...
Function * removeFunctionFromModule(CallGraphNode *CGN)
Unlink the function from this module, returning it.
Definition: CallGraph.cpp:122
InstrTy * getInstruction() const
Definition: CallSite.h:92
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition: Inliner.cpp:796
static Optional< InlineCost > shouldInline(CallSite CS, function_ref< InlineCost(CallSite CS)> GetInlineCost, OptimizationRemarkEmitter &ORE)
Return the cost only if the inliner should attempt to inline at the given CallSite.
Definition: Inliner.cpp:372
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
Class to represent array types.
Definition: DerivedTypes.h:369
A lazily constructed view of the call graph of a module.
const int LastCallToStaticBonus
Definition: InlineCost.h:47
DiagnosticInfoOptimizationBase::setIsVerbose setIsVerbose
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
#define DEBUG_TYPE
Definition: Inliner.cpp:73
amdgpu Simplify well known AMD library false Value * Callee
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:475
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:418
SmallVector< CallSite, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:207
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isAlways() const
Definition: InlineCost.h:98
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:759
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
Definition: Inliner.cpp:123
void removeAnyCallEdgeTo(CallGraphNode *Callee)
Removes all call edges from this node to the specified callee function.
Definition: CallGraph.cpp:203
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:114
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
bool inlineCalls(CallGraphSCC &SCC)
This function performs the main work of the pass.
Definition: Inliner.cpp:692
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:626
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallPtrSetImpl< LazyCallGraph::RefSCC * > & InvalidatedRefSCCs
The set of invalidated RefSCCs which should be skipped if they are found in RCWorklist.
SmallPtrSetImpl< LazyCallGraph::SCC * > & InvalidatedSCCs
The set of invalidated SCCs which should be skipped if they are found in CWorklist.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:820
A node in the call graph.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
A class used to represent edges in the call graph.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:853
iterator erase(const_iterator CI)
Definition: SmallVector.h:449
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
bool doInitialization(CallGraph &CG) override
doInitialization - This method is called before the SCC&#39;s of the program has been processed...
Definition: Inliner.cpp:452
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:834
int getThreshold() const
Get the threshold against which the cost was computed.
Definition: InlineCost.h:110
void getAAResultsAnalysisUsage(AnalysisUsage &AU)
A helper for the legacy pass manager to populate AU to add uses to make sure the analyses required by...
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
unsigned first
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:188
A function analysis which provides an AssumptionCache.
static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function< AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, bool InsertLifetime, function_ref< InlineCost(CallSite CS)> GetInlineCost, function_ref< AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
Definition: Inliner.cpp:465
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void removeDeadFunction(Function &F)
Remove a dead function from the call graph (typically to delete it).
ImportedFunctionsInliningStatistics ImportedFunctionsStats
Definition: Inliner.h:78
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1062
static cl::opt< bool > DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden)
Flag to disable manual alloca merging.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static LocalAsMetadata * getIfExists(Value *Local)
Definition: Metadata.h:438
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
void dropAllReferences()
dropAllReferences() - This method causes all the subinstructions to "let go" of all references that t...
Definition: Function.cpp:325
AssumptionCacheTracker * ACT
Definition: Inliner.h:76
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly=false)
Remove dead functions.
Definition: Inliner.cpp:715
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller&#39;s and callee&#39;s attributes.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
void dump(bool Verbose)
Dump stats computed with InlinerStatistics class.
iterator_range< user_iterator > users()
Definition: Value.h:401
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref< AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
If it is possible to inline the specified call site, do so and update the CallGraph for this operatio...
Definition: Inliner.cpp:266
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:104
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:118
bool hasComdat() const
Definition: GlobalObject.h:100
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:601
void filterDeadComdatFunctions(Module &M, SmallVectorImpl< Function *> &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive...
SmallVector< AllocaInst *, 4 > StaticAllocas
StaticAllocas - InlineFunction fills this in with all static allocas that get copied into the caller...
Definition: Cloning.h:196
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
LegacyInlinerBase(char &ID)
Definition: Inliner.cpp:115
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
SmallVector< WeakTrackingVH, 8 > InlinedCalls
InlinedCalls - InlineFunction fills this in with callsites that were inlined from the callee...
Definition: Cloning.h:200
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:706
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
ProfileSummaryInfo * PSI
Definition: Inliner.h:77
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:137
This header provides classes for managing passes over SCCs of the call graph.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:324
LLVM Value Representation.
Definition: Value.h:73
An SCC of the call graph.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
#define DEBUG(X)
Definition: Debug.h:118
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
inst_range instructions(Function *F)
Definition: InstIterator.h:134
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref< InlineCost(CallSite CS)> GetInlineCost)
Return true if inlining of CS can block the caller from being inlined which is proved to be more bene...
Definition: Inliner.cpp:298
static cl::opt< InlinerFunctionImportStatsOpts > InlinerFunctionImportStats("inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", "basic statistics"), clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions"))
This header defines various interfaces for pass management in LLVM.
SmallDenseSet< std::pair< LazyCallGraph::Node *, LazyCallGraph::SCC * >, 4 > & InlinedInternalEdges
A hacky area where the inliner can retain history about inlining decisions that mutated the call grap...
This represents the llvm.dbg.declare instruction.
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:328
bool isDefTriviallyDead() const
isDefTriviallyDead - Return true if it is trivially safe to remove this function definition from the ...
Definition: Function.cpp:1233
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60