LLVM  10.0.0svn
Inliner.cpp
Go to the documentation of this file.
1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the mechanics required to implement inlining without
10 // missing any calls and updating the call graph. The decisions of which calls
11 // are profitable to inline are implemented elsewhere.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CallSite.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstIterator.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/Debug.h"
63 #include <algorithm>
64 #include <cassert>
65 #include <functional>
66 #include <sstream>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "inline"
74 
75 STATISTIC(NumInlined, "Number of functions inlined");
76 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
77 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
78 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
79 
80 // This weirdly named statistic tracks the number of times that, when attempting
81 // to inline a function A into B, we analyze the callers of B in order to see
82 // if those would be more profitable and blocked inline steps.
83 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
84 
85 /// Flag to disable manual alloca merging.
86 ///
87 /// Merging of allocas was originally done as a stack-size saving technique
88 /// prior to LLVM's code generator having support for stack coloring based on
89 /// lifetime markers. It is now in the process of being removed. To experiment
90 /// with disabling it and relying fully on lifetime marker based stack
91 /// coloring, you can pass this flag to LLVM.
92 static cl::opt<bool>
93  DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
94  cl::init(false), cl::Hidden);
95 
96 namespace {
97 
99  No = 0,
100  Basic = 1,
101  Verbose = 2,
102 };
103 
104 } // end anonymous namespace
105 
107  "inliner-function-import-stats",
110  "basic statistics"),
111  clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
112  "printing of statistics for each inlined function")),
113  cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
114 
115 /// Flag to add inline messages as callsite attributes 'inline-remark'.
116 static cl::opt<bool>
117  InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
118  cl::Hidden,
119  cl::desc("Enable adding inline-remark attribute to"
120  " callsites processed by inliner but decided"
121  " to be not inlined"));
122 
124 
125 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
126  : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
127 
128 /// For this class, we declare that we require and preserve the call graph.
129 /// If the derived class implements this method, it should
130 /// always explicitly call the implementation here.
137 }
138 
140 
141 /// Look at all of the allocas that we inlined through this call site. If we
142 /// have already inlined other allocas through other calls into this function,
143 /// then we know that they have disjoint lifetimes and that we can merge them.
144 ///
145 /// There are many heuristics possible for merging these allocas, and the
146 /// different options have different tradeoffs. One thing that we *really*
147 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
148 /// longer address taken and so they can be promoted.
149 ///
150 /// Our "solution" for that is to only merge allocas whose outermost type is an
151 /// array type. These are usually not promoted because someone is using a
152 /// variable index into them. These are also often the most important ones to
153 /// merge.
154 ///
155 /// A better solution would be to have real memory lifetime markers in the IR
156 /// and not have the inliner do any merging of allocas at all. This would
157 /// allow the backend to do proper stack slot coloring of all allocas that
158 /// *actually make it to the backend*, which is really what we want.
159 ///
160 /// Because we don't have this information, we do this simple and useful hack.
162  Function *Caller, InlineFunctionInfo &IFI,
163  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
164  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
165 
166  // When processing our SCC, check to see if CS was inlined from some other
167  // call site. For example, if we're processing "A" in this code:
168  // A() { B() }
169  // B() { x = alloca ... C() }
170  // C() { y = alloca ... }
171  // Assume that C was not inlined into B initially, and so we're processing A
172  // and decide to inline B into A. Doing this makes an alloca available for
173  // reuse and makes a callsite (C) available for inlining. When we process
174  // the C call site we don't want to do any alloca merging between X and Y
175  // because their scopes are not disjoint. We could make this smarter by
176  // keeping track of the inline history for each alloca in the
177  // InlinedArrayAllocas but this isn't likely to be a significant win.
178  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
179  return;
180 
181  // Loop over all the allocas we have so far and see if they can be merged with
182  // a previously inlined alloca. If not, remember that we had it.
183  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
184  ++AllocaNo) {
185  AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
186 
187  // Don't bother trying to merge array allocations (they will usually be
188  // canonicalized to be an allocation *of* an array), or allocations whose
189  // type is not itself an array (because we're afraid of pessimizing SRoA).
191  if (!ATy || AI->isArrayAllocation())
192  continue;
193 
194  // Get the list of all available allocas for this array type.
195  std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
196 
197  // Loop over the allocas in AllocasForType to see if we can reuse one. Note
198  // that we have to be careful not to reuse the same "available" alloca for
199  // multiple different allocas that we just inlined, we use the 'UsedAllocas'
200  // set to keep track of which "available" allocas are being used by this
201  // function. Also, AllocasForType can be empty of course!
202  bool MergedAwayAlloca = false;
203  for (AllocaInst *AvailableAlloca : AllocasForType) {
204  unsigned Align1 = AI->getAlignment(),
205  Align2 = AvailableAlloca->getAlignment();
206 
207  // The available alloca has to be in the right function, not in some other
208  // function in this SCC.
209  if (AvailableAlloca->getParent() != AI->getParent())
210  continue;
211 
212  // If the inlined function already uses this alloca then we can't reuse
213  // it.
214  if (!UsedAllocas.insert(AvailableAlloca).second)
215  continue;
216 
217  // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
218  // success!
219  LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
220  << "\n\t\tINTO: " << *AvailableAlloca << '\n');
221 
222  // Move affected dbg.declare calls immediately after the new alloca to
223  // avoid the situation when a dbg.declare precedes its alloca.
224  if (auto *L = LocalAsMetadata::getIfExists(AI))
225  if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
226  for (User *U : MDV->users())
227  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
228  DDI->moveBefore(AvailableAlloca->getNextNode());
229 
230  AI->replaceAllUsesWith(AvailableAlloca);
231 
232  if (Align1 != Align2) {
233  if (!Align1 || !Align2) {
234  const DataLayout &DL = Caller->getParent()->getDataLayout();
235  unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
236 
237  Align1 = Align1 ? Align1 : TypeAlign;
238  Align2 = Align2 ? Align2 : TypeAlign;
239  }
240 
241  if (Align1 > Align2)
242  AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
243  }
244 
245  AI->eraseFromParent();
246  MergedAwayAlloca = true;
247  ++NumMergedAllocas;
248  IFI.StaticAllocas[AllocaNo] = nullptr;
249  break;
250  }
251 
252  // If we already nuked the alloca, we're done with it.
253  if (MergedAwayAlloca)
254  continue;
255 
256  // If we were unable to merge away the alloca either because there are no
257  // allocas of the right type available or because we reused them all
258  // already, remember that this alloca came from an inlined function and mark
259  // it used so we don't reuse it for other allocas from this inline
260  // operation.
261  AllocasForType.push_back(AI);
262  UsedAllocas.insert(AI);
263  }
264 }
265 
266 /// If it is possible to inline the specified call site,
267 /// do so and update the CallGraph for this operation.
268 ///
269 /// This function also does some basic book-keeping to update the IR. The
270 /// InlinedArrayAllocas map keeps track of any allocas that are already
271 /// available from other functions inlined into the caller. If we are able to
272 /// inline this call site we attempt to reuse already available allocas or add
273 /// any new allocas to the set if not possible.
275  CallSite CS, InlineFunctionInfo &IFI,
276  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
277  bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
280  Function *Caller = CS.getCaller();
281 
282  AAResults &AAR = AARGetter(*Callee);
283 
284  // Try to inline the function. Get the list of static allocas that were
285  // inlined.
286  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
287  if (!IR)
288  return IR;
289 
290  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
291  ImportedFunctionsStats.recordInline(*Caller, *Callee);
292 
294 
296  mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
297 
298  return IR; // success
299 }
300 
301 /// Return true if inlining of CS can block the caller from being
302 /// inlined which is proved to be more beneficial. \p IC is the
303 /// estimated inline cost associated with callsite \p CS.
304 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the
305 /// caller if \p CS is suppressed for inlining.
306 static bool
308  int &TotalSecondaryCost,
309  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
310  // For now we only handle local or inline functions.
311  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
312  return false;
313  // If the cost of inlining CS is non-positive, it is not going to prevent the
314  // caller from being inlined into its callers and hence we don't need to
315  // defer.
316  if (IC.getCost() <= 0)
317  return false;
318  // Try to detect the case where the current inlining candidate caller (call
319  // it B) is a static or linkonce-ODR function and is an inlining candidate
320  // elsewhere, and the current candidate callee (call it C) is large enough
321  // that inlining it into B would make B too big to inline later. In these
322  // circumstances it may be best not to inline C into B, but to inline B into
323  // its callers.
324  //
325  // This only applies to static and linkonce-ODR functions because those are
326  // expected to be available for inlining in the translation units where they
327  // are used. Thus we will always have the opportunity to make local inlining
328  // decisions. Importantly the linkonce-ODR linkage covers inline functions
329  // and templates in C++.
330  //
331  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
332  // the internal implementation of the inline cost metrics rather than
333  // treating them as truly abstract units etc.
334  TotalSecondaryCost = 0;
335  // The candidate cost to be imposed upon the current function.
336  int CandidateCost = IC.getCost() - 1;
337  // If the caller has local linkage and can be inlined to all its callers, we
338  // can apply a huge negative bonus to TotalSecondaryCost.
339  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
340  // This bool tracks what happens if we DO inline C into B.
341  bool inliningPreventsSomeOuterInline = false;
342  for (User *U : Caller->users()) {
343  // If the caller will not be removed (either because it does not have a
344  // local linkage or because the LastCallToStaticBonus has been already
345  // applied), then we can exit the loop early.
346  if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
347  return false;
348  CallSite CS2(U);
349 
350  // If this isn't a call to Caller (it could be some other sort
351  // of reference) skip it. Such references will prevent the caller
352  // from being removed.
353  if (!CS2 || CS2.getCalledFunction() != Caller) {
354  ApplyLastCallBonus = false;
355  continue;
356  }
357 
358  InlineCost IC2 = GetInlineCost(CS2);
359  ++NumCallerCallersAnalyzed;
360  if (!IC2) {
361  ApplyLastCallBonus = false;
362  continue;
363  }
364  if (IC2.isAlways())
365  continue;
366 
367  // See if inlining of the original callsite would erase the cost delta of
368  // this callsite. We subtract off the penalty for the call instruction,
369  // which we would be deleting.
370  if (IC2.getCostDelta() <= CandidateCost) {
371  inliningPreventsSomeOuterInline = true;
372  TotalSecondaryCost += IC2.getCost();
373  }
374  }
375  // If all outer calls to Caller would get inlined, the cost for the last
376  // one is set very low by getInlineCost, in anticipation that Caller will
377  // be removed entirely. We did not account for this above unless there
378  // is only one caller of Caller.
379  if (ApplyLastCallBonus)
380  TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
381 
382  if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
383  return true;
384 
385  return false;
386 }
387 
388 static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
389  const ore::NV &Arg) {
390  return R << Arg.Val;
391 }
392 
393 template <class RemarkT>
394 RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395  using namespace ore;
396  if (IC.isAlways()) {
397  R << "(cost=always)";
398  } else if (IC.isNever()) {
399  R << "(cost=never)";
400  } else {
401  R << "(cost=" << ore::NV("Cost", IC.getCost())
402  << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403  }
404  if (const char *Reason = IC.getReason())
405  R << ": " << ore::NV("Reason", Reason);
406  return R;
407 }
408 
409 static std::string inlineCostStr(const InlineCost &IC) {
410  std::stringstream Remark;
411  Remark << IC;
412  return Remark.str();
413 }
414 
415 /// Return the cost only if the inliner should attempt to inline at the given
416 /// CallSite. If we return the cost, we will emit an optimisation remark later
417 /// using that cost, so we won't do so from this function.
421  using namespace ore;
422 
423  InlineCost IC = GetInlineCost(CS);
424  Instruction *Call = CS.getInstruction();
426  Function *Caller = CS.getCaller();
427 
428  if (IC.isAlways()) {
429  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
430  << ", Call: " << *CS.getInstruction() << "\n");
431  return IC;
432  }
433 
434  if (IC.isNever()) {
435  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
436  << ", Call: " << *CS.getInstruction() << "\n");
437  ORE.emit([&]() {
438  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
439  << NV("Callee", Callee) << " not inlined into "
440  << NV("Caller", Caller) << " because it should never be inlined "
441  << IC;
442  });
443  return IC;
444  }
445 
446  if (!IC) {
447  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
448  << ", Call: " << *CS.getInstruction() << "\n");
449  ORE.emit([&]() {
450  return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
451  << NV("Callee", Callee) << " not inlined into "
452  << NV("Caller", Caller) << " because too costly to inline " << IC;
453  });
454  return IC;
455  }
456 
457  int TotalSecondaryCost = 0;
458  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
459  LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
460  << " Cost = " << IC.getCost()
461  << ", outer Cost = " << TotalSecondaryCost << '\n');
462  ORE.emit([&]() {
463  return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
464  Call)
465  << "Not inlining. Cost of inlining " << NV("Callee", Callee)
466  << " increases the cost of inlining " << NV("Caller", Caller)
467  << " in other contexts";
468  });
469 
470  // IC does not bool() to false, so get an InlineCost that will.
471  // This will not be inspected to make an error message.
472  return None;
473  }
474 
475  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
476  << ", Call: " << *CS.getInstruction() << '\n');
477  return IC;
478 }
479 
480 /// Return true if the specified inline history ID
481 /// indicates an inline history that includes the specified function.
483  Function *F, int InlineHistoryID,
484  const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
485  while (InlineHistoryID != -1) {
486  assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
487  "Invalid inline history ID");
488  if (InlineHistory[InlineHistoryID].first == F)
489  return true;
490  InlineHistoryID = InlineHistory[InlineHistoryID].second;
491  }
492  return false;
493 }
494 
496  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
498  return false; // No changes to CallGraph.
499 }
500 
502  if (skipSCC(SCC))
503  return false;
504  return inlineCalls(SCC);
505 }
506 
508  const BasicBlock *Block, const Function &Callee,
509  const Function &Caller, const InlineCost &IC) {
510  ORE.emit([&]() {
511  bool AlwaysInline = IC.isAlways();
512  StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
513  return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
514  << ore::NV("Callee", &Callee) << " inlined into "
515  << ore::NV("Caller", &Caller) << " with " << IC;
516  });
517 }
518 
519 static void setInlineRemark(CallSite &CS, StringRef message) {
520  if (!InlineRemarkAttribute)
521  return;
522 
523  Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
525 }
526 
527 static bool
529  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
532  bool InsertLifetime,
533  function_ref<InlineCost(CallSite CS)> GetInlineCost,
534  function_ref<AAResults &(Function &)> AARGetter,
536  SmallPtrSet<Function *, 8> SCCFunctions;
537  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
538  for (CallGraphNode *Node : SCC) {
539  Function *F = Node->getFunction();
540  if (F)
541  SCCFunctions.insert(F);
542  LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
543  }
544 
545  // Scan through and identify all call sites ahead of time so that we only
546  // inline call sites in the original functions, not call sites that result
547  // from inlining other functions.
549 
550  // When inlining a callee produces new call sites, we want to keep track of
551  // the fact that they were inlined from the callee. This allows us to avoid
552  // infinite inlining in some obscure cases. To represent this, we use an
553  // index into the InlineHistory vector.
554  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
555 
556  for (CallGraphNode *Node : SCC) {
557  Function *F = Node->getFunction();
558  if (!F || F->isDeclaration())
559  continue;
560 
562  for (BasicBlock &BB : *F)
563  for (Instruction &I : BB) {
564  CallSite CS(cast<Value>(&I));
565  // If this isn't a call, or it is a call to an intrinsic, it can
566  // never be inlined.
567  if (!CS || isa<IntrinsicInst>(I))
568  continue;
569 
570  // If this is a direct call to an external function, we can never inline
571  // it. If it is an indirect call, inlining may resolve it to be a
572  // direct call, so we keep it.
573  if (Function *Callee = CS.getCalledFunction())
574  if (Callee->isDeclaration()) {
575  using namespace ore;
576 
577  setInlineRemark(CS, "unavailable definition");
578  ORE.emit([&]() {
579  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
580  << NV("Callee", Callee) << " will not be inlined into "
581  << NV("Caller", CS.getCaller())
582  << " because its definition is unavailable"
583  << setIsVerbose();
584  });
585  continue;
586  }
587 
588  CallSites.push_back(std::make_pair(CS, -1));
589  }
590  }
591 
592  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
593 
594  // If there are no calls in this function, exit early.
595  if (CallSites.empty())
596  return false;
597 
598  // Now that we have all of the call sites, move the ones to functions in the
599  // current SCC to the end of the list.
600  unsigned FirstCallInSCC = CallSites.size();
601  for (unsigned i = 0; i < FirstCallInSCC; ++i)
602  if (Function *F = CallSites[i].first.getCalledFunction())
603  if (SCCFunctions.count(F))
604  std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
605 
606  InlinedArrayAllocasTy InlinedArrayAllocas;
607  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
608 
609  // Now that we have all of the call sites, loop over them and inline them if
610  // it looks profitable to do so.
611  bool Changed = false;
612  bool LocalChange;
613  do {
614  LocalChange = false;
615  // Iterate over the outer loop because inlining functions can cause indirect
616  // calls to become direct calls.
617  // CallSites may be modified inside so ranged for loop can not be used.
618  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
619  CallSite CS = CallSites[CSi].first;
620 
621  Function *Caller = CS.getCaller();
623 
624  // We can only inline direct calls to non-declarations.
625  if (!Callee || Callee->isDeclaration())
626  continue;
627 
628  Instruction *Instr = CS.getInstruction();
629 
630  bool IsTriviallyDead =
631  isInstructionTriviallyDead(Instr, &GetTLI(*Caller));
632 
633  int InlineHistoryID;
634  if (!IsTriviallyDead) {
635  // If this call site was obtained by inlining another function, verify
636  // that the include path for the function did not include the callee
637  // itself. If so, we'd be recursively inlining the same function,
638  // which would provide the same callsites, which would cause us to
639  // infinitely inline.
640  InlineHistoryID = CallSites[CSi].second;
641  if (InlineHistoryID != -1 &&
642  InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
643  setInlineRemark(CS, "recursive");
644  continue;
645  }
646  }
647 
648  // FIXME for new PM: because of the old PM we currently generate ORE and
649  // in turn BFI on demand. With the new PM, the ORE dependency should
650  // just become a regular analysis dependency.
651  OptimizationRemarkEmitter ORE(Caller);
652 
653  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
654  // If the policy determines that we should inline this function,
655  // delete the call instead.
656  if (!OIC.hasValue()) {
657  setInlineRemark(CS, "deferred");
658  continue;
659  }
660 
661  if (!OIC.getValue()) {
662  // shouldInline() call returned a negative inline cost that explains
663  // why this callsite should not be inlined.
664  setInlineRemark(CS, inlineCostStr(*OIC));
665  continue;
666  }
667 
668  // If this call site is dead and it is to a readonly function, we should
669  // just delete the call instead of trying to inline it, regardless of
670  // size. This happens because IPSCCP propagates the result out of the
671  // call and then we're left with the dead call.
672  if (IsTriviallyDead) {
673  LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
674  // Update the call graph by deleting the edge from Callee to Caller.
675  setInlineRemark(CS, "trivially dead");
676  CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
677  Instr->eraseFromParent();
678  ++NumCallsDeleted;
679  } else {
680  // Get DebugLoc to report. CS will be invalid after Inliner.
681  DebugLoc DLoc = CS->getDebugLoc();
682  BasicBlock *Block = CS.getParent();
683 
684  // Attempt to inline the function.
685  using namespace ore;
686 
688  CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
689  InsertLifetime, AARGetter, ImportedFunctionsStats);
690  if (!IR) {
691  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
692  ORE.emit([&]() {
693  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
694  Block)
695  << NV("Callee", Callee) << " will not be inlined into "
696  << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
697  });
698  continue;
699  }
700  ++NumInlined;
701 
702  emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
703 
704  // If inlining this function gave us any new call sites, throw them
705  // onto our worklist to process. They are useful inline candidates.
706  if (!InlineInfo.InlinedCalls.empty()) {
707  // Create a new inline history entry for this, so that we remember
708  // that these new callsites came about due to inlining Callee.
709  int NewHistoryID = InlineHistory.size();
710  InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
711 
712  for (Value *Ptr : InlineInfo.InlinedCalls)
713  CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
714  }
715  }
716 
717  // If we inlined or deleted the last possible call site to the function,
718  // delete the function body now.
719  if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
720  // TODO: Can remove if in SCC now.
721  !SCCFunctions.count(Callee) &&
722  // The function may be apparently dead, but if there are indirect
723  // callgraph references to the node, we cannot delete it yet, this
724  // could invalidate the CGSCC iterator.
725  CG[Callee]->getNumReferences() == 0) {
726  LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
727  << Callee->getName() << "\n");
728  CallGraphNode *CalleeNode = CG[Callee];
729 
730  // Remove any call graph edges from the callee to its callees.
731  CalleeNode->removeAllCalledFunctions();
732 
733  // Removing the node for callee from the call graph and delete it.
734  delete CG.removeFunctionFromModule(CalleeNode);
735  ++NumDeleted;
736  }
737 
738  // Remove this call site from the list. If possible, use
739  // swap/pop_back for efficiency, but do not use it if doing so would
740  // move a call site to a function in this SCC before the
741  // 'FirstCallInSCC' barrier.
742  if (SCC.isSingular()) {
743  CallSites[CSi] = CallSites.back();
744  CallSites.pop_back();
745  } else {
746  CallSites.erase(CallSites.begin() + CSi);
747  }
748  --CSi;
749 
750  Changed = true;
751  LocalChange = true;
752  }
753  } while (LocalChange);
754 
755  return Changed;
756 }
757 
759  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
760  ACT = &getAnalysis<AssumptionCacheTracker>();
761  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
762  auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
763  return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
764  };
765  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
766  return ACT->getAssumptionCache(F);
767  };
768  return inlineCallsImpl(
769  SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
770  [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this),
772 }
773 
774 /// Remove now-dead linkonce functions at the end of
775 /// processing to avoid breaking the SCC traversal.
777  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
778  ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
779  InlinerFunctionImportStatsOpts::Verbose);
780  return removeDeadFunctions(CG);
781 }
782 
783 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
785  bool AlwaysInlineOnly) {
786  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
787  SmallVector<Function *, 16> DeadFunctionsInComdats;
788 
789  auto RemoveCGN = [&](CallGraphNode *CGN) {
790  // Remove any call graph edges from the function to its callees.
791  CGN->removeAllCalledFunctions();
792 
793  // Remove any edges from the external node to the function's call graph
794  // node. These edges might have been made irrelegant due to
795  // optimization of the program.
797 
798  // Removing the node for callee from the call graph and delete it.
799  FunctionsToRemove.push_back(CGN);
800  };
801 
802  // Scan for all of the functions, looking for ones that should now be removed
803  // from the program. Insert the dead ones in the FunctionsToRemove set.
804  for (const auto &I : CG) {
805  CallGraphNode *CGN = I.second.get();
806  Function *F = CGN->getFunction();
807  if (!F || F->isDeclaration())
808  continue;
809 
810  // Handle the case when this function is called and we only want to care
811  // about always-inline functions. This is a bit of a hack to share code
812  // between here and the InlineAlways pass.
813  if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
814  continue;
815 
816  // If the only remaining users of the function are dead constants, remove
817  // them.
819 
820  if (!F->isDefTriviallyDead())
821  continue;
822 
823  // It is unsafe to drop a function with discardable linkage from a COMDAT
824  // without also dropping the other members of the COMDAT.
825  // The inliner doesn't visit non-function entities which are in COMDAT
826  // groups so it is unsafe to do so *unless* the linkage is local.
827  if (!F->hasLocalLinkage()) {
828  if (F->hasComdat()) {
829  DeadFunctionsInComdats.push_back(F);
830  continue;
831  }
832  }
833 
834  RemoveCGN(CGN);
835  }
836  if (!DeadFunctionsInComdats.empty()) {
837  // Filter out the functions whose comdats remain alive.
838  filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
839  // Remove the rest.
840  for (Function *F : DeadFunctionsInComdats)
841  RemoveCGN(CG[F]);
842  }
843 
844  if (FunctionsToRemove.empty())
845  return false;
846 
847  // Now that we know which functions to delete, do so. We didn't want to do
848  // this inline, because that would invalidate our CallGraph::iterator
849  // objects. :(
850  //
851  // Note that it doesn't matter that we are iterating over a non-stable order
852  // here to do this, it doesn't matter which order the functions are deleted
853  // in.
854  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
855  FunctionsToRemove.erase(
856  std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
857  FunctionsToRemove.end());
858  for (CallGraphNode *CGN : FunctionsToRemove) {
859  delete CG.removeFunctionFromModule(CGN);
860  ++NumDeleted;
861  }
862  return true;
863 }
864 
867  assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
868  ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
869  InlinerFunctionImportStatsOpts::Verbose);
870  }
871 }
872 
875  CGSCCUpdateResult &UR) {
876  const ModuleAnalysisManager &MAM =
877  AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
878  bool Changed = false;
879 
880  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
881  Module &M = *InitialC.begin()->getFunction().getParent();
883 
884  if (!ImportedFunctionsStats &&
885  InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
887  std::make_unique<ImportedFunctionsInliningStatistics>();
889  }
890 
891  // We use a single common worklist for calls across the entire SCC. We
892  // process these in-order and append new calls introduced during inlining to
893  // the end.
894  //
895  // Note that this particular order of processing is actually critical to
896  // avoid very bad behaviors. Consider *highly connected* call graphs where
897  // each function contains a small amonut of code and a couple of calls to
898  // other functions. Because the LLVM inliner is fundamentally a bottom-up
899  // inliner, it can handle gracefully the fact that these all appear to be
900  // reasonable inlining candidates as it will flatten things until they become
901  // too big to inline, and then move on and flatten another batch.
902  //
903  // However, when processing call edges *within* an SCC we cannot rely on this
904  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
905  // functions we can end up incrementally inlining N calls into each of
906  // N functions because each incremental inlining decision looks good and we
907  // don't have a topological ordering to prevent explosions.
908  //
909  // To compensate for this, we don't process transitive edges made immediate
910  // by inlining until we've done one pass of inlining across the entire SCC.
911  // Large, highly connected SCCs still lead to some amount of code bloat in
912  // this model, but it is uniformly spread across all the functions in the SCC
913  // and eventually they all become too large to inline, rather than
914  // incrementally maknig a single function grow in a super linear fashion.
916 
919  .getManager();
920 
921  // Populate the initial list of calls in this SCC.
922  for (auto &N : InitialC) {
923  auto &ORE =
924  FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
925  // We want to generally process call sites top-down in order for
926  // simplifications stemming from replacing the call with the returned value
927  // after inlining to be visible to subsequent inlining decisions.
928  // FIXME: Using instructions sequence is a really bad way to do this.
929  // Instead we should do an actual RPO walk of the function body.
930  for (Instruction &I : instructions(N.getFunction()))
931  if (auto CS = CallSite(&I))
932  if (Function *Callee = CS.getCalledFunction()) {
933  if (!Callee->isDeclaration())
934  Calls.push_back({CS, -1});
935  else if (!isa<IntrinsicInst>(I)) {
936  using namespace ore;
937  setInlineRemark(CS, "unavailable definition");
938  ORE.emit([&]() {
939  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
940  << NV("Callee", Callee) << " will not be inlined into "
941  << NV("Caller", CS.getCaller())
942  << " because its definition is unavailable"
943  << setIsVerbose();
944  });
945  }
946  }
947  }
948  if (Calls.empty())
949  return PreservedAnalyses::all();
950 
951  // Capture updatable variables for the current SCC and RefSCC.
952  auto *C = &InitialC;
953  auto *RC = &C->getOuterRefSCC();
954 
955  // When inlining a callee produces new call sites, we want to keep track of
956  // the fact that they were inlined from the callee. This allows us to avoid
957  // infinite inlining in some obscure cases. To represent this, we use an
958  // index into the InlineHistory vector.
959  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
960 
961  // Track a set vector of inlined callees so that we can augment the caller
962  // with all of their edges in the call graph before pruning out the ones that
963  // got simplified away.
964  SmallSetVector<Function *, 4> InlinedCallees;
965 
966  // Track the dead functions to delete once finished with inlining calls. We
967  // defer deleting these to make it easier to handle the call graph updates.
968  SmallVector<Function *, 4> DeadFunctions;
969 
970  // Loop forward over all of the calls. Note that we cannot cache the size as
971  // inlining can introduce new calls that need to be processed.
972  for (int i = 0; i < (int)Calls.size(); ++i) {
973  // We expect the calls to typically be batched with sequences of calls that
974  // have the same caller, so we first set up some shared infrastructure for
975  // this caller. We also do any pruning we can at this layer on the caller
976  // alone.
977  Function &F = *Calls[i].first.getCaller();
978  LazyCallGraph::Node &N = *CG.lookup(F);
979  if (CG.lookupSCC(N) != C)
980  continue;
981  if (F.hasOptNone()) {
982  setInlineRemark(Calls[i].first, "optnone attribute");
983  continue;
984  }
985 
986  LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
987 
988  // Get a FunctionAnalysisManager via a proxy for this particular node. We
989  // do this each time we visit a node as the SCC may have changed and as
990  // we're going to mutate this particular function we want to make sure the
991  // proxy is in place to forward any invalidation events. We can use the
992  // manager we get here for looking up results for functions other than this
993  // node however because those functions aren't going to be mutated by this
994  // pass.
997  .getManager();
998 
999  // Get the remarks emission analysis for the caller.
1000  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1001 
1002  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1003  [&](Function &F) -> AssumptionCache & {
1004  return FAM.getResult<AssumptionAnalysis>(F);
1005  };
1006  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
1007  return FAM.getResult<BlockFrequencyAnalysis>(F);
1008  };
1009 
1010  auto GetInlineCost = [&](CallSite CS) {
1012  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
1013  bool RemarksEnabled =
1015  DEBUG_TYPE);
1016  return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
1017  CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
1018  RemarksEnabled ? &ORE : nullptr);
1019  };
1020 
1021  // Now process as many calls as we have within this caller in the sequnece.
1022  // We bail out as soon as the caller has to change so we can update the
1023  // call graph and prepare the context of that new caller.
1024  bool DidInline = false;
1025  for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
1026  int InlineHistoryID;
1027  CallSite CS;
1028  std::tie(CS, InlineHistoryID) = Calls[i];
1030 
1031  if (InlineHistoryID != -1 &&
1032  InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
1033  setInlineRemark(CS, "recursive");
1034  continue;
1035  }
1036 
1037  // Check if this inlining may repeat breaking an SCC apart that has
1038  // already been split once before. In that case, inlining here may
1039  // trigger infinite inlining, much like is prevented within the inliner
1040  // itself by the InlineHistory above, but spread across CGSCC iterations
1041  // and thus hidden from the full inline history.
1042  if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
1043  UR.InlinedInternalEdges.count({&N, C})) {
1044  LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
1045  "previously split out of this SCC by inlining: "
1046  << F.getName() << " -> " << Callee.getName() << "\n");
1047  setInlineRemark(CS, "recursive SCC split");
1048  continue;
1049  }
1050 
1051  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
1052  // Check whether we want to inline this callsite.
1053  if (!OIC.hasValue()) {
1054  setInlineRemark(CS, "deferred");
1055  continue;
1056  }
1057 
1058  if (!OIC.getValue()) {
1059  // shouldInline() call returned a negative inline cost that explains
1060  // why this callsite should not be inlined.
1061  setInlineRemark(CS, inlineCostStr(*OIC));
1062  continue;
1063  }
1064 
1065  // Setup the data structure used to plumb customization into the
1066  // `InlineFunction` routine.
1067  InlineFunctionInfo IFI(
1068  /*cg=*/nullptr, &GetAssumptionCache, PSI,
1069  &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
1070  &FAM.getResult<BlockFrequencyAnalysis>(Callee));
1071 
1072  // Get DebugLoc to report. CS will be invalid after Inliner.
1073  DebugLoc DLoc = CS->getDebugLoc();
1074  BasicBlock *Block = CS.getParent();
1075 
1076  using namespace ore;
1077 
1078  InlineResult IR = InlineFunction(CS, IFI);
1079  if (!IR) {
1080  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
1081  ORE.emit([&]() {
1082  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
1083  << NV("Callee", &Callee) << " will not be inlined into "
1084  << NV("Caller", &F) << ": " << NV("Reason", IR.message);
1085  });
1086  continue;
1087  }
1088  DidInline = true;
1089  InlinedCallees.insert(&Callee);
1090 
1091  ++NumInlined;
1092 
1093  emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
1094 
1095  // Add any new callsites to defined functions to the worklist.
1096  if (!IFI.InlinedCallSites.empty()) {
1097  int NewHistoryID = InlineHistory.size();
1098  InlineHistory.push_back({&Callee, InlineHistoryID});
1099  for (CallSite &CS : reverse(IFI.InlinedCallSites))
1100  if (Function *NewCallee = CS.getCalledFunction())
1101  if (!NewCallee->isDeclaration())
1102  Calls.push_back({CS, NewHistoryID});
1103  }
1104 
1105  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
1107 
1108  // Merge the attributes based on the inlining.
1110 
1111  // For local functions, check whether this makes the callee trivially
1112  // dead. In that case, we can drop the body of the function eagerly
1113  // which may reduce the number of callers of other functions to one,
1114  // changing inline cost thresholds.
1115  if (Callee.hasLocalLinkage()) {
1116  // To check this we also need to nuke any dead constant uses (perhaps
1117  // made dead by this operation on other functions).
1118  Callee.removeDeadConstantUsers();
1119  if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
1120  Calls.erase(
1121  std::remove_if(Calls.begin() + i + 1, Calls.end(),
1122  [&Callee](const std::pair<CallSite, int> &Call) {
1123  return Call.first.getCaller() == &Callee;
1124  }),
1125  Calls.end());
1126  // Clear the body and queue the function itself for deletion when we
1127  // finish inlining and call graph updates.
1128  // Note that after this point, it is an error to do anything other
1129  // than use the callee's address or delete it.
1130  Callee.dropAllReferences();
1131  assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
1132  "Cannot put cause a function to become dead twice!");
1133  DeadFunctions.push_back(&Callee);
1134  }
1135  }
1136  }
1137 
1138  // Back the call index up by one to put us in a good position to go around
1139  // the outer loop.
1140  --i;
1141 
1142  if (!DidInline)
1143  continue;
1144  Changed = true;
1145 
1146  // Add all the inlined callees' edges as ref edges to the caller. These are
1147  // by definition trivial edges as we always have *some* transitive ref edge
1148  // chain. While in some cases these edges are direct calls inside the
1149  // callee, they have to be modeled in the inliner as reference edges as
1150  // there may be a reference edge anywhere along the chain from the current
1151  // caller to the callee that causes the whole thing to appear like
1152  // a (transitive) reference edge that will require promotion to a call edge
1153  // below.
1154  for (Function *InlinedCallee : InlinedCallees) {
1155  LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
1156  for (LazyCallGraph::Edge &E : *CalleeN)
1157  RC->insertTrivialRefEdge(N, E.getNode());
1158  }
1159 
1160  // At this point, since we have made changes we have at least removed
1161  // a call instruction. However, in the process we do some incremental
1162  // simplification of the surrounding code. This simplification can
1163  // essentially do all of the same things as a function pass and we can
1164  // re-use the exact same logic for updating the call graph to reflect the
1165  // change.
1166  LazyCallGraph::SCC *OldC = C;
1167  C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
1168  LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
1169  RC = &C->getOuterRefSCC();
1170 
1171  // If this causes an SCC to split apart into multiple smaller SCCs, there
1172  // is a subtle risk we need to prepare for. Other transformations may
1173  // expose an "infinite inlining" opportunity later, and because of the SCC
1174  // mutation, we will revisit this function and potentially re-inline. If we
1175  // do, and that re-inlining also has the potentially to mutate the SCC
1176  // structure, the infinite inlining problem can manifest through infinite
1177  // SCC splits and merges. To avoid this, we capture the originating caller
1178  // node and the SCC containing the call edge. This is a slight over
1179  // approximation of the possible inlining decisions that must be avoided,
1180  // but is relatively efficient to store. We use C != OldC to know when
1181  // a new SCC is generated and the original SCC may be generated via merge
1182  // in later iterations.
1183  //
1184  // It is also possible that even if no new SCC is generated
1185  // (i.e., C == OldC), the original SCC could be split and then merged
1186  // into the same one as itself. and the original SCC will be added into
1187  // UR.CWorklist again, we want to catch such cases too.
1188  //
1189  // FIXME: This seems like a very heavyweight way of retaining the inline
1190  // history, we should look for a more efficient way of tracking it.
1191  if ((C != OldC || UR.CWorklist.count(OldC)) &&
1192  llvm::any_of(InlinedCallees, [&](Function *Callee) {
1193  return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
1194  })) {
1195  LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
1196  "retaining this to avoid infinite inlining.\n");
1197  UR.InlinedInternalEdges.insert({&N, OldC});
1198  }
1199  InlinedCallees.clear();
1200  }
1201 
1202  // Now that we've finished inlining all of the calls across this SCC, delete
1203  // all of the trivially dead functions, updating the call graph and the CGSCC
1204  // pass manager in the process.
1205  //
1206  // Note that this walks a pointer set which has non-deterministic order but
1207  // that is OK as all we do is delete things and add pointers to unordered
1208  // sets.
1209  for (Function *DeadF : DeadFunctions) {
1210  // Get the necessary information out of the call graph and nuke the
1211  // function there. Also, cclear out any cached analyses.
1212  auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
1215  .getManager();
1216  FAM.clear(*DeadF, DeadF->getName());
1217  AM.clear(DeadC, DeadC.getName());
1218  auto &DeadRC = DeadC.getOuterRefSCC();
1219  CG.removeDeadFunction(*DeadF);
1220 
1221  // Mark the relevant parts of the call graph as invalid so we don't visit
1222  // them.
1223  UR.InvalidatedSCCs.insert(&DeadC);
1224  UR.InvalidatedRefSCCs.insert(&DeadRC);
1225 
1226  // And delete the actual function from the module.
1227  M.getFunctionList().erase(DeadF);
1228  ++NumDeleted;
1229  }
1230 
1231  if (!Changed)
1232  return PreservedAnalyses::all();
1233 
1234  // Even if we change the IR, we update the core CGSCC data structures and so
1235  // can preserve the proxy to the function analysis manager.
1236  PreservedAnalyses PA;
1238  return PA;
1239 }
InlinerFunctionImportStatsOpts
Definition: Inliner.cpp:98
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
bool doFinalization(CallGraph &CG) override
Remove now-dead linkonce functions at the end of processing to avoid breaking the SCC traversal...
Definition: Inliner.cpp:776
void setModuleInfo(const Module &M)
Set information like AllFunctions, ImportedFunctions, ModuleName.
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
Diagnostic information for missed-optimization remarks.
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:616
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl< std::pair< Function *, int >> &InlineHistory)
Return true if the specified inline history ID indicates an inline history that includes the specifie...
Definition: Inliner.cpp:482
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LazyCallGraph::SCC & updateCGAndAnalysisManagerForFunctionPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR)
Helper to update the call graph after running a function pass.
void recordInline(const Function &Caller, const Function &Callee)
Record inline of.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManager.h:746
static InlineResult InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref< AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
If it is possible to inline the specified call site, do so and update the CallGraph for this operatio...
Definition: Inliner.cpp:274
virtual InlineCost getInlineCost(CallSite CS)=0
This method must be implemented by the subclass to determine the cost of inlining the specified call ...
SCC * lookupSCC(Node &N) const
Lookup a function&#39;s SCC in the graph.
Analysis providing profile information.
Implements a lazy call graph analysis and related passes for the new pass manager.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
F(f)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
Calculate and dump ThinLTO specific inliner stats.
static std::string inlineCostStr(const InlineCost &IC)
Definition: Inliner.cpp:409
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory)
Look at all of the allocas that we inlined through this call site.
Definition: Inliner.cpp:161
A proxy from a FunctionAnalysisManager to an SCC.
A node in the call graph for a module.
Definition: CallGraph.h:164
bool skipSCC(CallGraphSCC &SCC) const
Optional passes call this function to check whether the pass should be skipped.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Represents the cost of inlining a function.
Definition: InlineCost.h:63
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:113
static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function< AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, std::function< TargetLibraryInfo &(Function &)> GetTLI, bool InsertLifetime, function_ref< InlineCost(CallSite CS)> GetInlineCost, function_ref< AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
Definition: Inliner.cpp:528
AnalysisUsage & addRequired()
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
Definition: Inliner.cpp:501
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
This class is a functor to be used in legacy module or SCC passes for computing AA results for a func...
bool isLibFunction(Function &F) const
Test whether a function is a known and defined library function tracked by the call graph...
This file contains the simple types necessary to represent the attributes associated with functions a...
Function * removeFunctionFromModule(CallGraphNode *CGN)
Unlink the function from this module, returning it.
Definition: CallGraph.cpp:120
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition: Inliner.cpp:873
static Optional< InlineCost > shouldInline(CallSite CS, function_ref< InlineCost(CallSite CS)> GetInlineCost, OptimizationRemarkEmitter &ORE)
Return the cost only if the inliner should attempt to inline at the given CallSite.
Definition: Inliner.cpp:419
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
InstrTy * getInstruction() const
Definition: CallSite.h:96
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Class to represent array types.
Definition: DerivedTypes.h:408
A lazily constructed view of the call graph of a module.
const int LastCallToStaticBonus
Definition: InlineCost.h:46
DiagnosticInfoOptimizationBase::setIsVerbose setIsVerbose
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
InlineResult is basically true or false.
Definition: InlineCost.h:135
#define DEBUG_TYPE
Definition: Inliner.cpp:73
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:571
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:436
SmallVector< CallSite, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:204
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isAlways() const
Definition: InlineCost.h:103
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1062
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
Definition: Inliner.cpp:131
void removeAnyCallEdgeTo(CallGraphNode *Callee)
Removes all call edges from this node to the specified callee function.
Definition: CallGraph.cpp:201
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:113
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool inlineCalls(CallGraphSCC &SCC)
This function performs the main work of the pass.
Definition: Inliner.cpp:758
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:652
SmallPtrSetImpl< LazyCallGraph::RefSCC * > & InvalidatedRefSCCs
The set of invalidated RefSCCs which should be skipped if they are found in RCWorklist.
SmallPtrSetImpl< LazyCallGraph::SCC * > & InvalidatedSCCs
The set of invalidated SCCs which should be skipped if they are found in CWorklist.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
constexpr double e
Definition: MathExtras.h:57
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
A node in the call graph.
static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC)
Definition: Inliner.cpp:507
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
A class used to represent edges in the call graph.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1205
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
virtual bool isMissedOptRemarkEnabled(StringRef PassName) const
Return true if missed optimization remarks are enabled, override to provide different implementation...
bool doInitialization(CallGraph &CG) override
doInitialization - This method is called before the SCC&#39;s of the program has been processed...
Definition: Inliner.cpp:495
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
int getThreshold() const
Get the threshold against which the cost was computed.
Definition: InlineCost.h:115
const char * getReason() const
Get the reason of Always or Never.
Definition: InlineCost.h:121
void getAAResultsAnalysisUsage(AnalysisUsage &AU)
A helper for the legacy pass manager to populate AU to add uses to make sure the analyses required by...
SmallPriorityWorklist< LazyCallGraph::SCC *, 1 > & CWorklist
Worklist of the SCCs queued for processing.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
unsigned first
void addAttribute(unsigned i, Attribute::AttrKind Kind)
Definition: CallSite.h:345
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:187
A function analysis which provides an AssumptionCache.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void removeDeadFunction(Function &F)
Remove a dead function from the call graph (typically to delete it).
ImportedFunctionsInliningStatistics ImportedFunctionsStats
Definition: Inliner.h:77
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
static cl::opt< bool > DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden)
Flag to disable manual alloca merging.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static LocalAsMetadata * getIfExists(Value *Local)
Definition: Metadata.h:439
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
void dropAllReferences()
dropAllReferences() - This method causes all the subinstructions to "let go" of all references that t...
Definition: Function.cpp:359
AssumptionCacheTracker * ACT
Definition: Inliner.h:75
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly=false)
Remove dead functions.
Definition: Inliner.cpp:784
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller&#39;s and callee&#39;s attributes.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
void dump(bool Verbose)
Dump stats computed with InlinerStatistics class.
amdgpu Simplify well known AMD library false FunctionCallee Callee
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:101
iterator_range< user_iterator > users()
Definition: Value.h:420
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:109
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:130
bool hasComdat() const
Definition: GlobalObject.h:105
static void setInlineRemark(CallSite &CS, StringRef message)
Definition: Inliner.cpp:519
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:627
void filterDeadComdatFunctions(Module &M, SmallVectorImpl< Function *> &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive...
SmallVector< AllocaInst *, 4 > StaticAllocas
InlineFunction fills this in with all static allocas that get copied into the caller.
Definition: Cloning.h:193
bool hasValue() const
Definition: Optional.h:259
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
LegacyInlinerBase(char &ID)
Definition: Inliner.cpp:123
const char * message
Definition: InlineCost.h:136
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
SmallVector< WeakTrackingVH, 8 > InlinedCalls
InlineFunction fills this in with callsites that were inlined from the callee.
Definition: Cloning.h:197
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:80
ProfileSummaryInfo * PSI
Definition: Inliner.h:76
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2047
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:136
This header provides classes for managing passes over SCCs of the call graph.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:74
An SCC of the call graph.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:433
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
inst_range instructions(Function *F)
Definition: InstIterator.h:133
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref< InlineCost(CallSite CS)> GetInlineCost)
Return true if inlining of CS can block the caller from being inlined which is proved to be more bene...
Definition: Inliner.cpp:307
static cl::opt< InlinerFunctionImportStatsOpts > InlinerFunctionImportStats("inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", "basic statistics"), clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions"))
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
SmallDenseSet< std::pair< LazyCallGraph::Node *, LazyCallGraph::SCC * >, 4 > & InlinedInternalEdges
A hacky area where the inliner can retain history about inlining decisions that mutated the call grap...
This represents the llvm.dbg.declare instruction.
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:343
Statically lint checks LLVM IR
Definition: Lint.cpp:192
bool isDefTriviallyDead() const
isDefTriviallyDead - Return true if it is trivially safe to remove this function definition from the ...
Definition: Function.cpp:1440
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
static cl::opt< bool > InlineRemarkAttribute("inline-remark-attribute", cl::init(false), cl::Hidden, cl::desc("Enable adding inline-remark attribute to" " callsites processed by inliner but decided" " to be not inlined"))
Flag to add inline messages as callsite attributes &#39;inline-remark&#39;.
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275