LLVM  10.0.0svn
Inliner.cpp
Go to the documentation of this file.
1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the mechanics required to implement inlining without
10 // missing any calls and updating the call graph. The decisions of which calls
11 // are profitable to inline are implemented elsewhere.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/CallSite.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/InstIterator.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/IntrinsicInst.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/IR/Module.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/Debug.h"
63 #include <algorithm>
64 #include <cassert>
65 #include <functional>
66 #include <sstream>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "inline"
74 
75 STATISTIC(NumInlined, "Number of functions inlined");
76 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
77 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
78 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
79 
80 // This weirdly named statistic tracks the number of times that, when attempting
81 // to inline a function A into B, we analyze the callers of B in order to see
82 // if those would be more profitable and blocked inline steps.
83 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
84 
85 /// Flag to disable manual alloca merging.
86 ///
87 /// Merging of allocas was originally done as a stack-size saving technique
88 /// prior to LLVM's code generator having support for stack coloring based on
89 /// lifetime markers. It is now in the process of being removed. To experiment
90 /// with disabling it and relying fully on lifetime marker based stack
91 /// coloring, you can pass this flag to LLVM.
92 static cl::opt<bool>
93  DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
94  cl::init(false), cl::Hidden);
95 
96 namespace {
97 
99  No = 0,
100  Basic = 1,
101  Verbose = 2,
102 };
103 
104 } // end anonymous namespace
105 
107  "inliner-function-import-stats",
110  "basic statistics"),
111  clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
112  "printing of statistics for each inlined function")),
113  cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
114 
115 /// Flag to add inline messages as callsite attributes 'inline-remark'.
116 static cl::opt<bool>
117  InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
118  cl::Hidden,
119  cl::desc("Enable adding inline-remark attribute to"
120  " callsites processed by inliner but decided"
121  " to be not inlined"));
122 
124 
125 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
126  : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
127 
128 /// For this class, we declare that we require and preserve the call graph.
129 /// If the derived class implements this method, it should
130 /// always explicitly call the implementation here.
137 }
138 
140 
141 /// Look at all of the allocas that we inlined through this call site. If we
142 /// have already inlined other allocas through other calls into this function,
143 /// then we know that they have disjoint lifetimes and that we can merge them.
144 ///
145 /// There are many heuristics possible for merging these allocas, and the
146 /// different options have different tradeoffs. One thing that we *really*
147 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
148 /// longer address taken and so they can be promoted.
149 ///
150 /// Our "solution" for that is to only merge allocas whose outermost type is an
151 /// array type. These are usually not promoted because someone is using a
152 /// variable index into them. These are also often the most important ones to
153 /// merge.
154 ///
155 /// A better solution would be to have real memory lifetime markers in the IR
156 /// and not have the inliner do any merging of allocas at all. This would
157 /// allow the backend to do proper stack slot coloring of all allocas that
158 /// *actually make it to the backend*, which is really what we want.
159 ///
160 /// Because we don't have this information, we do this simple and useful hack.
162  Function *Caller, InlineFunctionInfo &IFI,
163  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
164  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
165 
166  // When processing our SCC, check to see if CS was inlined from some other
167  // call site. For example, if we're processing "A" in this code:
168  // A() { B() }
169  // B() { x = alloca ... C() }
170  // C() { y = alloca ... }
171  // Assume that C was not inlined into B initially, and so we're processing A
172  // and decide to inline B into A. Doing this makes an alloca available for
173  // reuse and makes a callsite (C) available for inlining. When we process
174  // the C call site we don't want to do any alloca merging between X and Y
175  // because their scopes are not disjoint. We could make this smarter by
176  // keeping track of the inline history for each alloca in the
177  // InlinedArrayAllocas but this isn't likely to be a significant win.
178  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
179  return;
180 
181  // Loop over all the allocas we have so far and see if they can be merged with
182  // a previously inlined alloca. If not, remember that we had it.
183  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
184  ++AllocaNo) {
185  AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
186 
187  // Don't bother trying to merge array allocations (they will usually be
188  // canonicalized to be an allocation *of* an array), or allocations whose
189  // type is not itself an array (because we're afraid of pessimizing SRoA).
191  if (!ATy || AI->isArrayAllocation())
192  continue;
193 
194  // Get the list of all available allocas for this array type.
195  std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
196 
197  // Loop over the allocas in AllocasForType to see if we can reuse one. Note
198  // that we have to be careful not to reuse the same "available" alloca for
199  // multiple different allocas that we just inlined, we use the 'UsedAllocas'
200  // set to keep track of which "available" allocas are being used by this
201  // function. Also, AllocasForType can be empty of course!
202  bool MergedAwayAlloca = false;
203  for (AllocaInst *AvailableAlloca : AllocasForType) {
204  unsigned Align1 = AI->getAlignment(),
205  Align2 = AvailableAlloca->getAlignment();
206 
207  // The available alloca has to be in the right function, not in some other
208  // function in this SCC.
209  if (AvailableAlloca->getParent() != AI->getParent())
210  continue;
211 
212  // If the inlined function already uses this alloca then we can't reuse
213  // it.
214  if (!UsedAllocas.insert(AvailableAlloca).second)
215  continue;
216 
217  // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
218  // success!
219  LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
220  << "\n\t\tINTO: " << *AvailableAlloca << '\n');
221 
222  // Move affected dbg.declare calls immediately after the new alloca to
223  // avoid the situation when a dbg.declare precedes its alloca.
224  if (auto *L = LocalAsMetadata::getIfExists(AI))
225  if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
226  for (User *U : MDV->users())
227  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
228  DDI->moveBefore(AvailableAlloca->getNextNode());
229 
230  AI->replaceAllUsesWith(AvailableAlloca);
231 
232  if (Align1 != Align2) {
233  if (!Align1 || !Align2) {
234  const DataLayout &DL = Caller->getParent()->getDataLayout();
235  unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
236 
237  Align1 = Align1 ? Align1 : TypeAlign;
238  Align2 = Align2 ? Align2 : TypeAlign;
239  }
240 
241  if (Align1 > Align2)
242  AvailableAlloca->setAlignment(AI->getAlignment());
243  }
244 
245  AI->eraseFromParent();
246  MergedAwayAlloca = true;
247  ++NumMergedAllocas;
248  IFI.StaticAllocas[AllocaNo] = nullptr;
249  break;
250  }
251 
252  // If we already nuked the alloca, we're done with it.
253  if (MergedAwayAlloca)
254  continue;
255 
256  // If we were unable to merge away the alloca either because there are no
257  // allocas of the right type available or because we reused them all
258  // already, remember that this alloca came from an inlined function and mark
259  // it used so we don't reuse it for other allocas from this inline
260  // operation.
261  AllocasForType.push_back(AI);
262  UsedAllocas.insert(AI);
263  }
264 }
265 
266 /// If it is possible to inline the specified call site,
267 /// do so and update the CallGraph for this operation.
268 ///
269 /// This function also does some basic book-keeping to update the IR. The
270 /// InlinedArrayAllocas map keeps track of any allocas that are already
271 /// available from other functions inlined into the caller. If we are able to
272 /// inline this call site we attempt to reuse already available allocas or add
273 /// any new allocas to the set if not possible.
275  CallSite CS, InlineFunctionInfo &IFI,
276  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
277  bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
280  Function *Caller = CS.getCaller();
281 
282  AAResults &AAR = AARGetter(*Callee);
283 
284  // Try to inline the function. Get the list of static allocas that were
285  // inlined.
286  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
287  if (!IR)
288  return IR;
289 
290  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
291  ImportedFunctionsStats.recordInline(*Caller, *Callee);
292 
294 
296  mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
297 
298  return IR; // success
299 }
300 
301 /// Return true if inlining of CS can block the caller from being
302 /// inlined which is proved to be more beneficial. \p IC is the
303 /// estimated inline cost associated with callsite \p CS.
304 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the
305 /// caller if \p CS is suppressed for inlining.
306 static bool
308  int &TotalSecondaryCost,
309  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
310  // For now we only handle local or inline functions.
311  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
312  return false;
313  // If the cost of inlining CS is non-positive, it is not going to prevent the
314  // caller from being inlined into its callers and hence we don't need to
315  // defer.
316  if (IC.getCost() <= 0)
317  return false;
318  // Try to detect the case where the current inlining candidate caller (call
319  // it B) is a static or linkonce-ODR function and is an inlining candidate
320  // elsewhere, and the current candidate callee (call it C) is large enough
321  // that inlining it into B would make B too big to inline later. In these
322  // circumstances it may be best not to inline C into B, but to inline B into
323  // its callers.
324  //
325  // This only applies to static and linkonce-ODR functions because those are
326  // expected to be available for inlining in the translation units where they
327  // are used. Thus we will always have the opportunity to make local inlining
328  // decisions. Importantly the linkonce-ODR linkage covers inline functions
329  // and templates in C++.
330  //
331  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
332  // the internal implementation of the inline cost metrics rather than
333  // treating them as truly abstract units etc.
334  TotalSecondaryCost = 0;
335  // The candidate cost to be imposed upon the current function.
336  int CandidateCost = IC.getCost() - 1;
337  // If the caller has local linkage and can be inlined to all its callers, we
338  // can apply a huge negative bonus to TotalSecondaryCost.
339  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
340  // This bool tracks what happens if we DO inline C into B.
341  bool inliningPreventsSomeOuterInline = false;
342  for (User *U : Caller->users()) {
343  // If the caller will not be removed (either because it does not have a
344  // local linkage or because the LastCallToStaticBonus has been already
345  // applied), then we can exit the loop early.
346  if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
347  return false;
348  CallSite CS2(U);
349 
350  // If this isn't a call to Caller (it could be some other sort
351  // of reference) skip it. Such references will prevent the caller
352  // from being removed.
353  if (!CS2 || CS2.getCalledFunction() != Caller) {
354  ApplyLastCallBonus = false;
355  continue;
356  }
357 
358  InlineCost IC2 = GetInlineCost(CS2);
359  ++NumCallerCallersAnalyzed;
360  if (!IC2) {
361  ApplyLastCallBonus = false;
362  continue;
363  }
364  if (IC2.isAlways())
365  continue;
366 
367  // See if inlining of the original callsite would erase the cost delta of
368  // this callsite. We subtract off the penalty for the call instruction,
369  // which we would be deleting.
370  if (IC2.getCostDelta() <= CandidateCost) {
371  inliningPreventsSomeOuterInline = true;
372  TotalSecondaryCost += IC2.getCost();
373  }
374  }
375  // If all outer calls to Caller would get inlined, the cost for the last
376  // one is set very low by getInlineCost, in anticipation that Caller will
377  // be removed entirely. We did not account for this above unless there
378  // is only one caller of Caller.
379  if (ApplyLastCallBonus)
380  TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
381 
382  if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
383  return true;
384 
385  return false;
386 }
387 
388 static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
389  const ore::NV &Arg) {
390  return R << Arg.Val;
391 }
392 
393 template <class RemarkT>
394 RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395  using namespace ore;
396  if (IC.isAlways()) {
397  R << "(cost=always)";
398  } else if (IC.isNever()) {
399  R << "(cost=never)";
400  } else {
401  R << "(cost=" << ore::NV("Cost", IC.getCost())
402  << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403  }
404  if (const char *Reason = IC.getReason())
405  R << ": " << ore::NV("Reason", Reason);
406  return R;
407 }
408 
409 static std::string inlineCostStr(const InlineCost &IC) {
410  std::stringstream Remark;
411  Remark << IC;
412  return Remark.str();
413 }
414 
415 /// Return the cost only if the inliner should attempt to inline at the given
416 /// CallSite. If we return the cost, we will emit an optimisation remark later
417 /// using that cost, so we won't do so from this function.
421  using namespace ore;
422 
423  InlineCost IC = GetInlineCost(CS);
424  Instruction *Call = CS.getInstruction();
426  Function *Caller = CS.getCaller();
427 
428  if (IC.isAlways()) {
429  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
430  << ", Call: " << *CS.getInstruction() << "\n");
431  return IC;
432  }
433 
434  if (IC.isNever()) {
435  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
436  << ", Call: " << *CS.getInstruction() << "\n");
437  ORE.emit([&]() {
438  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
439  << NV("Callee", Callee) << " not inlined into "
440  << NV("Caller", Caller) << " because it should never be inlined "
441  << IC;
442  });
443  return IC;
444  }
445 
446  if (!IC) {
447  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
448  << ", Call: " << *CS.getInstruction() << "\n");
449  ORE.emit([&]() {
450  return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
451  << NV("Callee", Callee) << " not inlined into "
452  << NV("Caller", Caller) << " because too costly to inline " << IC;
453  });
454  return IC;
455  }
456 
457  int TotalSecondaryCost = 0;
458  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
459  LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
460  << " Cost = " << IC.getCost()
461  << ", outer Cost = " << TotalSecondaryCost << '\n');
462  ORE.emit([&]() {
463  return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
464  Call)
465  << "Not inlining. Cost of inlining " << NV("Callee", Callee)
466  << " increases the cost of inlining " << NV("Caller", Caller)
467  << " in other contexts";
468  });
469 
470  // IC does not bool() to false, so get an InlineCost that will.
471  // This will not be inspected to make an error message.
472  return None;
473  }
474 
475  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
476  << ", Call: " << *CS.getInstruction() << '\n');
477  return IC;
478 }
479 
480 /// Return true if the specified inline history ID
481 /// indicates an inline history that includes the specified function.
483  Function *F, int InlineHistoryID,
484  const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
485  while (InlineHistoryID != -1) {
486  assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
487  "Invalid inline history ID");
488  if (InlineHistory[InlineHistoryID].first == F)
489  return true;
490  InlineHistoryID = InlineHistory[InlineHistoryID].second;
491  }
492  return false;
493 }
494 
496  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
498  return false; // No changes to CallGraph.
499 }
500 
502  if (skipSCC(SCC))
503  return false;
504  return inlineCalls(SCC);
505 }
506 
508  const BasicBlock *Block, const Function &Callee,
509  const Function &Caller, const InlineCost &IC) {
510  ORE.emit([&]() {
511  bool AlwaysInline = IC.isAlways();
512  StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
513  return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
514  << ore::NV("Callee", &Callee) << " inlined into "
515  << ore::NV("Caller", &Caller) << " with " << IC;
516  });
517 }
518 
519 static void setInlineRemark(CallSite &CS, StringRef message) {
520  if (!InlineRemarkAttribute)
521  return;
522 
523  Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
525 }
526 
527 static bool
529  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
531  bool InsertLifetime,
532  function_ref<InlineCost(CallSite CS)> GetInlineCost,
533  function_ref<AAResults &(Function &)> AARGetter,
535  SmallPtrSet<Function *, 8> SCCFunctions;
536  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
537  for (CallGraphNode *Node : SCC) {
538  Function *F = Node->getFunction();
539  if (F)
540  SCCFunctions.insert(F);
541  LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
542  }
543 
544  // Scan through and identify all call sites ahead of time so that we only
545  // inline call sites in the original functions, not call sites that result
546  // from inlining other functions.
548 
549  // When inlining a callee produces new call sites, we want to keep track of
550  // the fact that they were inlined from the callee. This allows us to avoid
551  // infinite inlining in some obscure cases. To represent this, we use an
552  // index into the InlineHistory vector.
553  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
554 
555  for (CallGraphNode *Node : SCC) {
556  Function *F = Node->getFunction();
557  if (!F || F->isDeclaration())
558  continue;
559 
561  for (BasicBlock &BB : *F)
562  for (Instruction &I : BB) {
563  CallSite CS(cast<Value>(&I));
564  // If this isn't a call, or it is a call to an intrinsic, it can
565  // never be inlined.
566  if (!CS || isa<IntrinsicInst>(I))
567  continue;
568 
569  // If this is a direct call to an external function, we can never inline
570  // it. If it is an indirect call, inlining may resolve it to be a
571  // direct call, so we keep it.
572  if (Function *Callee = CS.getCalledFunction())
573  if (Callee->isDeclaration()) {
574  using namespace ore;
575 
576  setInlineRemark(CS, "unavailable definition");
577  ORE.emit([&]() {
578  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
579  << NV("Callee", Callee) << " will not be inlined into "
580  << NV("Caller", CS.getCaller())
581  << " because its definition is unavailable"
582  << setIsVerbose();
583  });
584  continue;
585  }
586 
587  CallSites.push_back(std::make_pair(CS, -1));
588  }
589  }
590 
591  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
592 
593  // If there are no calls in this function, exit early.
594  if (CallSites.empty())
595  return false;
596 
597  // Now that we have all of the call sites, move the ones to functions in the
598  // current SCC to the end of the list.
599  unsigned FirstCallInSCC = CallSites.size();
600  for (unsigned i = 0; i < FirstCallInSCC; ++i)
601  if (Function *F = CallSites[i].first.getCalledFunction())
602  if (SCCFunctions.count(F))
603  std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
604 
605  InlinedArrayAllocasTy InlinedArrayAllocas;
606  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
607 
608  // Now that we have all of the call sites, loop over them and inline them if
609  // it looks profitable to do so.
610  bool Changed = false;
611  bool LocalChange;
612  do {
613  LocalChange = false;
614  // Iterate over the outer loop because inlining functions can cause indirect
615  // calls to become direct calls.
616  // CallSites may be modified inside so ranged for loop can not be used.
617  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
618  CallSite CS = CallSites[CSi].first;
619 
620  Function *Caller = CS.getCaller();
622 
623  // We can only inline direct calls to non-declarations.
624  if (!Callee || Callee->isDeclaration())
625  continue;
626 
627  Instruction *Instr = CS.getInstruction();
628 
629  bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
630 
631  int InlineHistoryID;
632  if (!IsTriviallyDead) {
633  // If this call site was obtained by inlining another function, verify
634  // that the include path for the function did not include the callee
635  // itself. If so, we'd be recursively inlining the same function,
636  // which would provide the same callsites, which would cause us to
637  // infinitely inline.
638  InlineHistoryID = CallSites[CSi].second;
639  if (InlineHistoryID != -1 &&
640  InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
641  setInlineRemark(CS, "recursive");
642  continue;
643  }
644  }
645 
646  // FIXME for new PM: because of the old PM we currently generate ORE and
647  // in turn BFI on demand. With the new PM, the ORE dependency should
648  // just become a regular analysis dependency.
649  OptimizationRemarkEmitter ORE(Caller);
650 
651  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
652  // If the policy determines that we should inline this function,
653  // delete the call instead.
654  if (!OIC.hasValue()) {
655  setInlineRemark(CS, "deferred");
656  continue;
657  }
658 
659  if (!OIC.getValue()) {
660  // shouldInline() call returned a negative inline cost that explains
661  // why this callsite should not be inlined.
662  setInlineRemark(CS, inlineCostStr(*OIC));
663  continue;
664  }
665 
666  // If this call site is dead and it is to a readonly function, we should
667  // just delete the call instead of trying to inline it, regardless of
668  // size. This happens because IPSCCP propagates the result out of the
669  // call and then we're left with the dead call.
670  if (IsTriviallyDead) {
671  LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
672  // Update the call graph by deleting the edge from Callee to Caller.
673  setInlineRemark(CS, "trivially dead");
674  CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
675  Instr->eraseFromParent();
676  ++NumCallsDeleted;
677  } else {
678  // Get DebugLoc to report. CS will be invalid after Inliner.
679  DebugLoc DLoc = CS->getDebugLoc();
680  BasicBlock *Block = CS.getParent();
681 
682  // Attempt to inline the function.
683  using namespace ore;
684 
686  CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
687  InsertLifetime, AARGetter, ImportedFunctionsStats);
688  if (!IR) {
689  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
690  ORE.emit([&]() {
691  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
692  Block)
693  << NV("Callee", Callee) << " will not be inlined into "
694  << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
695  });
696  continue;
697  }
698  ++NumInlined;
699 
700  emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
701 
702  // If inlining this function gave us any new call sites, throw them
703  // onto our worklist to process. They are useful inline candidates.
704  if (!InlineInfo.InlinedCalls.empty()) {
705  // Create a new inline history entry for this, so that we remember
706  // that these new callsites came about due to inlining Callee.
707  int NewHistoryID = InlineHistory.size();
708  InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
709 
710  for (Value *Ptr : InlineInfo.InlinedCalls)
711  CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
712  }
713  }
714 
715  // If we inlined or deleted the last possible call site to the function,
716  // delete the function body now.
717  if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
718  // TODO: Can remove if in SCC now.
719  !SCCFunctions.count(Callee) &&
720  // The function may be apparently dead, but if there are indirect
721  // callgraph references to the node, we cannot delete it yet, this
722  // could invalidate the CGSCC iterator.
723  CG[Callee]->getNumReferences() == 0) {
724  LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
725  << Callee->getName() << "\n");
726  CallGraphNode *CalleeNode = CG[Callee];
727 
728  // Remove any call graph edges from the callee to its callees.
729  CalleeNode->removeAllCalledFunctions();
730 
731  // Removing the node for callee from the call graph and delete it.
732  delete CG.removeFunctionFromModule(CalleeNode);
733  ++NumDeleted;
734  }
735 
736  // Remove this call site from the list. If possible, use
737  // swap/pop_back for efficiency, but do not use it if doing so would
738  // move a call site to a function in this SCC before the
739  // 'FirstCallInSCC' barrier.
740  if (SCC.isSingular()) {
741  CallSites[CSi] = CallSites.back();
742  CallSites.pop_back();
743  } else {
744  CallSites.erase(CallSites.begin() + CSi);
745  }
746  --CSi;
747 
748  Changed = true;
749  LocalChange = true;
750  }
751  } while (LocalChange);
752 
753  return Changed;
754 }
755 
757  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
758  ACT = &getAnalysis<AssumptionCacheTracker>();
759  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
760  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
761  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
762  return ACT->getAssumptionCache(F);
763  };
764  return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
765  [this](CallSite CS) { return getInlineCost(CS); },
767 }
768 
769 /// Remove now-dead linkonce functions at the end of
770 /// processing to avoid breaking the SCC traversal.
772  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
773  ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
774  InlinerFunctionImportStatsOpts::Verbose);
775  return removeDeadFunctions(CG);
776 }
777 
778 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
780  bool AlwaysInlineOnly) {
781  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
782  SmallVector<Function *, 16> DeadFunctionsInComdats;
783 
784  auto RemoveCGN = [&](CallGraphNode *CGN) {
785  // Remove any call graph edges from the function to its callees.
786  CGN->removeAllCalledFunctions();
787 
788  // Remove any edges from the external node to the function's call graph
789  // node. These edges might have been made irrelegant due to
790  // optimization of the program.
792 
793  // Removing the node for callee from the call graph and delete it.
794  FunctionsToRemove.push_back(CGN);
795  };
796 
797  // Scan for all of the functions, looking for ones that should now be removed
798  // from the program. Insert the dead ones in the FunctionsToRemove set.
799  for (const auto &I : CG) {
800  CallGraphNode *CGN = I.second.get();
801  Function *F = CGN->getFunction();
802  if (!F || F->isDeclaration())
803  continue;
804 
805  // Handle the case when this function is called and we only want to care
806  // about always-inline functions. This is a bit of a hack to share code
807  // between here and the InlineAlways pass.
808  if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
809  continue;
810 
811  // If the only remaining users of the function are dead constants, remove
812  // them.
814 
815  if (!F->isDefTriviallyDead())
816  continue;
817 
818  // It is unsafe to drop a function with discardable linkage from a COMDAT
819  // without also dropping the other members of the COMDAT.
820  // The inliner doesn't visit non-function entities which are in COMDAT
821  // groups so it is unsafe to do so *unless* the linkage is local.
822  if (!F->hasLocalLinkage()) {
823  if (F->hasComdat()) {
824  DeadFunctionsInComdats.push_back(F);
825  continue;
826  }
827  }
828 
829  RemoveCGN(CGN);
830  }
831  if (!DeadFunctionsInComdats.empty()) {
832  // Filter out the functions whose comdats remain alive.
833  filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
834  // Remove the rest.
835  for (Function *F : DeadFunctionsInComdats)
836  RemoveCGN(CG[F]);
837  }
838 
839  if (FunctionsToRemove.empty())
840  return false;
841 
842  // Now that we know which functions to delete, do so. We didn't want to do
843  // this inline, because that would invalidate our CallGraph::iterator
844  // objects. :(
845  //
846  // Note that it doesn't matter that we are iterating over a non-stable order
847  // here to do this, it doesn't matter which order the functions are deleted
848  // in.
849  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
850  FunctionsToRemove.erase(
851  std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
852  FunctionsToRemove.end());
853  for (CallGraphNode *CGN : FunctionsToRemove) {
854  delete CG.removeFunctionFromModule(CGN);
855  ++NumDeleted;
856  }
857  return true;
858 }
859 
862  assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
863  ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
864  InlinerFunctionImportStatsOpts::Verbose);
865  }
866 }
867 
870  CGSCCUpdateResult &UR) {
871  const ModuleAnalysisManager &MAM =
872  AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
873  bool Changed = false;
874 
875  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
876  Module &M = *InitialC.begin()->getFunction().getParent();
878 
879  if (!ImportedFunctionsStats &&
880  InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
882  std::make_unique<ImportedFunctionsInliningStatistics>();
884  }
885 
886  // We use a single common worklist for calls across the entire SCC. We
887  // process these in-order and append new calls introduced during inlining to
888  // the end.
889  //
890  // Note that this particular order of processing is actually critical to
891  // avoid very bad behaviors. Consider *highly connected* call graphs where
892  // each function contains a small amonut of code and a couple of calls to
893  // other functions. Because the LLVM inliner is fundamentally a bottom-up
894  // inliner, it can handle gracefully the fact that these all appear to be
895  // reasonable inlining candidates as it will flatten things until they become
896  // too big to inline, and then move on and flatten another batch.
897  //
898  // However, when processing call edges *within* an SCC we cannot rely on this
899  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
900  // functions we can end up incrementally inlining N calls into each of
901  // N functions because each incremental inlining decision looks good and we
902  // don't have a topological ordering to prevent explosions.
903  //
904  // To compensate for this, we don't process transitive edges made immediate
905  // by inlining until we've done one pass of inlining across the entire SCC.
906  // Large, highly connected SCCs still lead to some amount of code bloat in
907  // this model, but it is uniformly spread across all the functions in the SCC
908  // and eventually they all become too large to inline, rather than
909  // incrementally maknig a single function grow in a super linear fashion.
911 
914  .getManager();
915 
916  // Populate the initial list of calls in this SCC.
917  for (auto &N : InitialC) {
918  auto &ORE =
919  FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
920  // We want to generally process call sites top-down in order for
921  // simplifications stemming from replacing the call with the returned value
922  // after inlining to be visible to subsequent inlining decisions.
923  // FIXME: Using instructions sequence is a really bad way to do this.
924  // Instead we should do an actual RPO walk of the function body.
925  for (Instruction &I : instructions(N.getFunction()))
926  if (auto CS = CallSite(&I))
927  if (Function *Callee = CS.getCalledFunction()) {
928  if (!Callee->isDeclaration())
929  Calls.push_back({CS, -1});
930  else if (!isa<IntrinsicInst>(I)) {
931  using namespace ore;
932  setInlineRemark(CS, "unavailable definition");
933  ORE.emit([&]() {
934  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
935  << NV("Callee", Callee) << " will not be inlined into "
936  << NV("Caller", CS.getCaller())
937  << " because its definition is unavailable"
938  << setIsVerbose();
939  });
940  }
941  }
942  }
943  if (Calls.empty())
944  return PreservedAnalyses::all();
945 
946  // Capture updatable variables for the current SCC and RefSCC.
947  auto *C = &InitialC;
948  auto *RC = &C->getOuterRefSCC();
949 
950  // When inlining a callee produces new call sites, we want to keep track of
951  // the fact that they were inlined from the callee. This allows us to avoid
952  // infinite inlining in some obscure cases. To represent this, we use an
953  // index into the InlineHistory vector.
954  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
955 
956  // Track a set vector of inlined callees so that we can augment the caller
957  // with all of their edges in the call graph before pruning out the ones that
958  // got simplified away.
959  SmallSetVector<Function *, 4> InlinedCallees;
960 
961  // Track the dead functions to delete once finished with inlining calls. We
962  // defer deleting these to make it easier to handle the call graph updates.
963  SmallVector<Function *, 4> DeadFunctions;
964 
965  // Loop forward over all of the calls. Note that we cannot cache the size as
966  // inlining can introduce new calls that need to be processed.
967  for (int i = 0; i < (int)Calls.size(); ++i) {
968  // We expect the calls to typically be batched with sequences of calls that
969  // have the same caller, so we first set up some shared infrastructure for
970  // this caller. We also do any pruning we can at this layer on the caller
971  // alone.
972  Function &F = *Calls[i].first.getCaller();
973  LazyCallGraph::Node &N = *CG.lookup(F);
974  if (CG.lookupSCC(N) != C)
975  continue;
976  if (F.hasOptNone()) {
977  setInlineRemark(Calls[i].first, "optnone attribute");
978  continue;
979  }
980 
981  LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
982 
983  // Get a FunctionAnalysisManager via a proxy for this particular node. We
984  // do this each time we visit a node as the SCC may have changed and as
985  // we're going to mutate this particular function we want to make sure the
986  // proxy is in place to forward any invalidation events. We can use the
987  // manager we get here for looking up results for functions other than this
988  // node however because those functions aren't going to be mutated by this
989  // pass.
992  .getManager();
993 
994  // Get the remarks emission analysis for the caller.
995  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
996 
997  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
998  [&](Function &F) -> AssumptionCache & {
999  return FAM.getResult<AssumptionAnalysis>(F);
1000  };
1001  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
1002  return FAM.getResult<BlockFrequencyAnalysis>(F);
1003  };
1004 
1005  auto GetInlineCost = [&](CallSite CS) {
1007  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
1008  bool RemarksEnabled =
1010  DEBUG_TYPE);
1011  return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
1012  CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
1013  RemarksEnabled ? &ORE : nullptr);
1014  };
1015 
1016  // Now process as many calls as we have within this caller in the sequnece.
1017  // We bail out as soon as the caller has to change so we can update the
1018  // call graph and prepare the context of that new caller.
1019  bool DidInline = false;
1020  for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
1021  int InlineHistoryID;
1022  CallSite CS;
1023  std::tie(CS, InlineHistoryID) = Calls[i];
1025 
1026  if (InlineHistoryID != -1 &&
1027  InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
1028  setInlineRemark(CS, "recursive");
1029  continue;
1030  }
1031 
1032  // Check if this inlining may repeat breaking an SCC apart that has
1033  // already been split once before. In that case, inlining here may
1034  // trigger infinite inlining, much like is prevented within the inliner
1035  // itself by the InlineHistory above, but spread across CGSCC iterations
1036  // and thus hidden from the full inline history.
1037  if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
1038  UR.InlinedInternalEdges.count({&N, C})) {
1039  LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
1040  "previously split out of this SCC by inlining: "
1041  << F.getName() << " -> " << Callee.getName() << "\n");
1042  setInlineRemark(CS, "recursive SCC split");
1043  continue;
1044  }
1045 
1046  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
1047  // Check whether we want to inline this callsite.
1048  if (!OIC.hasValue()) {
1049  setInlineRemark(CS, "deferred");
1050  continue;
1051  }
1052 
1053  if (!OIC.getValue()) {
1054  // shouldInline() call returned a negative inline cost that explains
1055  // why this callsite should not be inlined.
1056  setInlineRemark(CS, inlineCostStr(*OIC));
1057  continue;
1058  }
1059 
1060  // Setup the data structure used to plumb customization into the
1061  // `InlineFunction` routine.
1062  InlineFunctionInfo IFI(
1063  /*cg=*/nullptr, &GetAssumptionCache, PSI,
1064  &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
1065  &FAM.getResult<BlockFrequencyAnalysis>(Callee));
1066 
1067  // Get DebugLoc to report. CS will be invalid after Inliner.
1068  DebugLoc DLoc = CS->getDebugLoc();
1069  BasicBlock *Block = CS.getParent();
1070 
1071  using namespace ore;
1072 
1073  InlineResult IR = InlineFunction(CS, IFI);
1074  if (!IR) {
1075  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
1076  ORE.emit([&]() {
1077  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
1078  << NV("Callee", &Callee) << " will not be inlined into "
1079  << NV("Caller", &F) << ": " << NV("Reason", IR.message);
1080  });
1081  continue;
1082  }
1083  DidInline = true;
1084  InlinedCallees.insert(&Callee);
1085 
1086  ++NumInlined;
1087 
1088  emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
1089 
1090  // Add any new callsites to defined functions to the worklist.
1091  if (!IFI.InlinedCallSites.empty()) {
1092  int NewHistoryID = InlineHistory.size();
1093  InlineHistory.push_back({&Callee, InlineHistoryID});
1094  for (CallSite &CS : reverse(IFI.InlinedCallSites))
1095  if (Function *NewCallee = CS.getCalledFunction())
1096  if (!NewCallee->isDeclaration())
1097  Calls.push_back({CS, NewHistoryID});
1098  }
1099 
1100  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
1102 
1103  // Merge the attributes based on the inlining.
1105 
1106  // For local functions, check whether this makes the callee trivially
1107  // dead. In that case, we can drop the body of the function eagerly
1108  // which may reduce the number of callers of other functions to one,
1109  // changing inline cost thresholds.
1110  if (Callee.hasLocalLinkage()) {
1111  // To check this we also need to nuke any dead constant uses (perhaps
1112  // made dead by this operation on other functions).
1113  Callee.removeDeadConstantUsers();
1114  if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
1115  Calls.erase(
1116  std::remove_if(Calls.begin() + i + 1, Calls.end(),
1117  [&Callee](const std::pair<CallSite, int> &Call) {
1118  return Call.first.getCaller() == &Callee;
1119  }),
1120  Calls.end());
1121  // Clear the body and queue the function itself for deletion when we
1122  // finish inlining and call graph updates.
1123  // Note that after this point, it is an error to do anything other
1124  // than use the callee's address or delete it.
1125  Callee.dropAllReferences();
1126  assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
1127  "Cannot put cause a function to become dead twice!");
1128  DeadFunctions.push_back(&Callee);
1129  }
1130  }
1131  }
1132 
1133  // Back the call index up by one to put us in a good position to go around
1134  // the outer loop.
1135  --i;
1136 
1137  if (!DidInline)
1138  continue;
1139  Changed = true;
1140 
1141  // Add all the inlined callees' edges as ref edges to the caller. These are
1142  // by definition trivial edges as we always have *some* transitive ref edge
1143  // chain. While in some cases these edges are direct calls inside the
1144  // callee, they have to be modeled in the inliner as reference edges as
1145  // there may be a reference edge anywhere along the chain from the current
1146  // caller to the callee that causes the whole thing to appear like
1147  // a (transitive) reference edge that will require promotion to a call edge
1148  // below.
1149  for (Function *InlinedCallee : InlinedCallees) {
1150  LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
1151  for (LazyCallGraph::Edge &E : *CalleeN)
1152  RC->insertTrivialRefEdge(N, E.getNode());
1153  }
1154 
1155  // At this point, since we have made changes we have at least removed
1156  // a call instruction. However, in the process we do some incremental
1157  // simplification of the surrounding code. This simplification can
1158  // essentially do all of the same things as a function pass and we can
1159  // re-use the exact same logic for updating the call graph to reflect the
1160  // change.
1161  LazyCallGraph::SCC *OldC = C;
1162  C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
1163  LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
1164  RC = &C->getOuterRefSCC();
1165 
1166  // If this causes an SCC to split apart into multiple smaller SCCs, there
1167  // is a subtle risk we need to prepare for. Other transformations may
1168  // expose an "infinite inlining" opportunity later, and because of the SCC
1169  // mutation, we will revisit this function and potentially re-inline. If we
1170  // do, and that re-inlining also has the potentially to mutate the SCC
1171  // structure, the infinite inlining problem can manifest through infinite
1172  // SCC splits and merges. To avoid this, we capture the originating caller
1173  // node and the SCC containing the call edge. This is a slight over
1174  // approximation of the possible inlining decisions that must be avoided,
1175  // but is relatively efficient to store. We use C != OldC to know when
1176  // a new SCC is generated and the original SCC may be generated via merge
1177  // in later iterations.
1178  //
1179  // It is also possible that even if no new SCC is generated
1180  // (i.e., C == OldC), the original SCC could be split and then merged
1181  // into the same one as itself. and the original SCC will be added into
1182  // UR.CWorklist again, we want to catch such cases too.
1183  //
1184  // FIXME: This seems like a very heavyweight way of retaining the inline
1185  // history, we should look for a more efficient way of tracking it.
1186  if ((C != OldC || UR.CWorklist.count(OldC)) &&
1187  llvm::any_of(InlinedCallees, [&](Function *Callee) {
1188  return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
1189  })) {
1190  LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
1191  "retaining this to avoid infinite inlining.\n");
1192  UR.InlinedInternalEdges.insert({&N, OldC});
1193  }
1194  InlinedCallees.clear();
1195  }
1196 
1197  // Now that we've finished inlining all of the calls across this SCC, delete
1198  // all of the trivially dead functions, updating the call graph and the CGSCC
1199  // pass manager in the process.
1200  //
1201  // Note that this walks a pointer set which has non-deterministic order but
1202  // that is OK as all we do is delete things and add pointers to unordered
1203  // sets.
1204  for (Function *DeadF : DeadFunctions) {
1205  // Get the necessary information out of the call graph and nuke the
1206  // function there. Also, cclear out any cached analyses.
1207  auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
1210  .getManager();
1211  FAM.clear(*DeadF, DeadF->getName());
1212  AM.clear(DeadC, DeadC.getName());
1213  auto &DeadRC = DeadC.getOuterRefSCC();
1214  CG.removeDeadFunction(*DeadF);
1215 
1216  // Mark the relevant parts of the call graph as invalid so we don't visit
1217  // them.
1218  UR.InvalidatedSCCs.insert(&DeadC);
1219  UR.InvalidatedRefSCCs.insert(&DeadRC);
1220 
1221  // And delete the actual function from the module.
1222  M.getFunctionList().erase(DeadF);
1223  ++NumDeleted;
1224  }
1225 
1226  if (!Changed)
1227  return PreservedAnalyses::all();
1228 
1229  // Even if we change the IR, we update the core CGSCC data structures and so
1230  // can preserve the proxy to the function analysis manager.
1231  PreservedAnalyses PA;
1233  return PA;
1234 }
InlinerFunctionImportStatsOpts
Definition: Inliner.cpp:98
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool doFinalization(CallGraph &CG) override
Remove now-dead linkonce functions at the end of processing to avoid breaking the SCC traversal...
Definition: Inliner.cpp:771
void setModuleInfo(const Module &M)
Set information like AllFunctions, ImportedFunctions, ModuleName.
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
Diagnostic information for missed-optimization remarks.
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:104
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:616
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl< std::pair< Function *, int >> &InlineHistory)
Return true if the specified inline history ID indicates an inline history that includes the specifie...
Definition: Inliner.cpp:482
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LazyCallGraph::SCC & updateCGAndAnalysisManagerForFunctionPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR)
Helper to update the call graph after running a function pass.
void recordInline(const Function &Caller, const Function &Callee)
Record inline of.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManager.h:746
static InlineResult InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref< AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
If it is possible to inline the specified call site, do so and update the CallGraph for this operatio...
Definition: Inliner.cpp:274
virtual InlineCost getInlineCost(CallSite CS)=0
This method must be implemented by the subclass to determine the cost of inlining the specified call ...
SCC * lookupSCC(Node &N) const
Lookup a function&#39;s SCC in the graph.
Analysis providing profile information.
Implements a lazy call graph analysis and related passes for the new pass manager.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:745
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
F(f)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
Calculate and dump ThinLTO specific inliner stats.
static std::string inlineCostStr(const InlineCost &IC)
Definition: Inliner.cpp:409
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:173
static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory)
Look at all of the allocas that we inlined through this call site.
Definition: Inliner.cpp:161
A proxy from a FunctionAnalysisManager to an SCC.
A node in the call graph for a module.
Definition: CallGraph.h:164
bool skipSCC(CallGraphSCC &SCC) const
Optional passes call this function to check whether the pass should be skipped.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Represents the cost of inlining a function.
Definition: InlineCost.h:63
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:113
AnalysisUsage & addRequired()
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
Definition: Inliner.cpp:501
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
This class is a functor to be used in legacy module or SCC passes for computing AA results for a func...
bool isLibFunction(Function &F) const
Test whether a function is a known and defined library function tracked by the call graph...
This file contains the simple types necessary to represent the attributes associated with functions a...
Function * removeFunctionFromModule(CallGraphNode *CGN)
Unlink the function from this module, returning it.
Definition: CallGraph.cpp:120
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition: Inliner.cpp:868
static Optional< InlineCost > shouldInline(CallSite CS, function_ref< InlineCost(CallSite CS)> GetInlineCost, OptimizationRemarkEmitter &ORE)
Return the cost only if the inliner should attempt to inline at the given CallSite.
Definition: Inliner.cpp:419
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
InstrTy * getInstruction() const
Definition: CallSite.h:96
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Class to represent array types.
Definition: DerivedTypes.h:403
A lazily constructed view of the call graph of a module.
const int LastCallToStaticBonus
Definition: InlineCost.h:46
DiagnosticInfoOptimizationBase::setIsVerbose setIsVerbose
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
InlineResult is basically true or false.
Definition: InlineCost.h:135
#define DEBUG_TYPE
Definition: Inliner.cpp:73
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:558
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:436
SmallVector< CallSite, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:204
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isAlways() const
Definition: InlineCost.h:103
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1074
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
Definition: Inliner.cpp:131
void removeAnyCallEdgeTo(CallGraphNode *Callee)
Removes all call edges from this node to the specified callee function.
Definition: CallGraph.cpp:201
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:113
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool inlineCalls(CallGraphSCC &SCC)
This function performs the main work of the pass.
Definition: Inliner.cpp:756
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:652
SmallPtrSetImpl< LazyCallGraph::RefSCC * > & InvalidatedRefSCCs
The set of invalidated RefSCCs which should be skipped if they are found in RCWorklist.
SmallPtrSetImpl< LazyCallGraph::SCC * > & InvalidatedSCCs
The set of invalidated SCCs which should be skipped if they are found in CWorklist.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1184
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
A node in the call graph.
static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC)
Definition: Inliner.cpp:507
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
A class used to represent edges in the call graph.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1217
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
virtual bool isMissedOptRemarkEnabled(StringRef PassName) const
Return true if missed optimization remarks are enabled, override to provide different implementation...
bool doInitialization(CallGraph &CG) override
doInitialization - This method is called before the SCC&#39;s of the program has been processed...
Definition: Inliner.cpp:495
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1198
int getThreshold() const
Get the threshold against which the cost was computed.
Definition: InlineCost.h:115
const char * getReason() const
Get the reason of Always or Never.
Definition: InlineCost.h:121
void getAAResultsAnalysisUsage(AnalysisUsage &AU)
A helper for the legacy pass manager to populate AU to add uses to make sure the analyses required by...
SmallPriorityWorklist< LazyCallGraph::SCC *, 1 > & CWorklist
Worklist of the SCCs queued for processing.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
unsigned first
void addAttribute(unsigned i, Attribute::AttrKind Kind)
Definition: CallSite.h:345
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:187
A function analysis which provides an AssumptionCache.
static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function< AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, bool InsertLifetime, function_ref< InlineCost(CallSite CS)> GetInlineCost, function_ref< AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
Definition: Inliner.cpp:528
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void removeDeadFunction(Function &F)
Remove a dead function from the call graph (typically to delete it).
ImportedFunctionsInliningStatistics ImportedFunctionsStats
Definition: Inliner.h:77
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
static cl::opt< bool > DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden)
Flag to disable manual alloca merging.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static LocalAsMetadata * getIfExists(Value *Local)
Definition: Metadata.h:439
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:752
void dropAllReferences()
dropAllReferences() - This method causes all the subinstructions to "let go" of all references that t...
Definition: Function.cpp:358
AssumptionCacheTracker * ACT
Definition: Inliner.h:75
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly=false)
Remove dead functions.
Definition: Inliner.cpp:779
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller&#39;s and callee&#39;s attributes.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
void dump(bool Verbose)
Dump stats computed with InlinerStatistics class.
amdgpu Simplify well known AMD library false FunctionCallee Callee
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:101
iterator_range< user_iterator > users()
Definition: Value.h:419
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:109
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:130
bool hasComdat() const
Definition: GlobalObject.h:104
static void setInlineRemark(CallSite &CS, StringRef message)
Definition: Inliner.cpp:519
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:627
void filterDeadComdatFunctions(Module &M, SmallVectorImpl< Function *> &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive...
SmallVector< AllocaInst *, 4 > StaticAllocas
InlineFunction fills this in with all static allocas that get copied into the caller.
Definition: Cloning.h:193
bool hasValue() const
Definition: Optional.h:259
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
LegacyInlinerBase(char &ID)
Definition: Inliner.cpp:123
const char * message
Definition: InlineCost.h:136
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
SmallVector< WeakTrackingVH, 8 > InlinedCalls
InlineFunction fills this in with callsites that were inlined from the callee.
Definition: Cloning.h:197
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:80
ProfileSummaryInfo * PSI
Definition: Inliner.h:76
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2045
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:136
This header provides classes for managing passes over SCCs of the call graph.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:73
An SCC of the call graph.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
inst_range instructions(Function *F)
Definition: InstIterator.h:133
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref< InlineCost(CallSite CS)> GetInlineCost)
Return true if inlining of CS can block the caller from being inlined which is proved to be more bene...
Definition: Inliner.cpp:307
static cl::opt< InlinerFunctionImportStatsOpts > InlinerFunctionImportStats("inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", "basic statistics"), clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions"))
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
SmallDenseSet< std::pair< LazyCallGraph::Node *, LazyCallGraph::SCC * >, 4 > & InlinedInternalEdges
A hacky area where the inliner can retain history about inlining decisions that mutated the call grap...
This represents the llvm.dbg.declare instruction.
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:342
Statically lint checks LLVM IR
Definition: Lint.cpp:192
bool isDefTriviallyDead() const
isDefTriviallyDead - Return true if it is trivially safe to remove this function definition from the ...
Definition: Function.cpp:1359
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
static cl::opt< bool > InlineRemarkAttribute("inline-remark-attribute", cl::init(false), cl::Hidden, cl::desc("Enable adding inline-remark attribute to" " callsites processed by inliner but decided" " to be not inlined"))
Flag to add inline messages as callsite attributes &#39;inline-remark&#39;.
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275