LLVM  14.0.0git
Inliner.cpp
Go to the documentation of this file.
1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the mechanics required to implement inlining without
10 // missing any calls and updating the call graph. The decisions of which calls
11 // are profitable to inline are implemented elsewhere.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/ScopeExit.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/DataLayout.h"
44 #include "llvm/IR/DebugLoc.h"
45 #include "llvm/IR/DerivedTypes.h"
46 #include "llvm/IR/DiagnosticInfo.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/InstIterator.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/IntrinsicInst.h"
52 #include "llvm/IR/Metadata.h"
53 #include "llvm/IR/Module.h"
54 #include "llvm/IR/PassManager.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/Debug.h"
66 #include <algorithm>
67 #include <cassert>
68 #include <functional>
69 #include <sstream>
70 #include <tuple>
71 #include <utility>
72 #include <vector>
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "inline"
77 
78 STATISTIC(NumInlined, "Number of functions inlined");
79 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
80 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
81 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
82 
83 /// Flag to disable manual alloca merging.
84 ///
85 /// Merging of allocas was originally done as a stack-size saving technique
86 /// prior to LLVM's code generator having support for stack coloring based on
87 /// lifetime markers. It is now in the process of being removed. To experiment
88 /// with disabling it and relying fully on lifetime marker based stack
89 /// coloring, you can pass this flag to LLVM.
90 static cl::opt<bool>
91  DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
92  cl::init(false), cl::Hidden);
93 
95 
97  "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
98  cl::desc(
99  "Optimization remarks file containing inline remarks to be replayed "
100  "by inlining from cgscc inline remarks."),
101  cl::Hidden);
102 
104  "inline-enable-priority-order", cl::Hidden, cl::init(false),
105  cl::desc("Enable the priority inline order for the inliner"));
106 
108 
109 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
110  : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
111 
112 /// For this class, we declare that we require and preserve the call graph.
113 /// If the derived class implements this method, it should
114 /// always explicitly call the implementation here.
121 }
122 
124 
125 /// Look at all of the allocas that we inlined through this call site. If we
126 /// have already inlined other allocas through other calls into this function,
127 /// then we know that they have disjoint lifetimes and that we can merge them.
128 ///
129 /// There are many heuristics possible for merging these allocas, and the
130 /// different options have different tradeoffs. One thing that we *really*
131 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
132 /// longer address taken and so they can be promoted.
133 ///
134 /// Our "solution" for that is to only merge allocas whose outermost type is an
135 /// array type. These are usually not promoted because someone is using a
136 /// variable index into them. These are also often the most important ones to
137 /// merge.
138 ///
139 /// A better solution would be to have real memory lifetime markers in the IR
140 /// and not have the inliner do any merging of allocas at all. This would
141 /// allow the backend to do proper stack slot coloring of all allocas that
142 /// *actually make it to the backend*, which is really what we want.
143 ///
144 /// Because we don't have this information, we do this simple and useful hack.
146  InlinedArrayAllocasTy &InlinedArrayAllocas,
147  int InlineHistory) {
148  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
149 
150  // When processing our SCC, check to see if the call site was inlined from
151  // some other call site. For example, if we're processing "A" in this code:
152  // A() { B() }
153  // B() { x = alloca ... C() }
154  // C() { y = alloca ... }
155  // Assume that C was not inlined into B initially, and so we're processing A
156  // and decide to inline B into A. Doing this makes an alloca available for
157  // reuse and makes a callsite (C) available for inlining. When we process
158  // the C call site we don't want to do any alloca merging between X and Y
159  // because their scopes are not disjoint. We could make this smarter by
160  // keeping track of the inline history for each alloca in the
161  // InlinedArrayAllocas but this isn't likely to be a significant win.
162  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
163  return;
164 
165  // Loop over all the allocas we have so far and see if they can be merged with
166  // a previously inlined alloca. If not, remember that we had it.
167  for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E;
168  ++AllocaNo) {
169  AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
170 
171  // Don't bother trying to merge array allocations (they will usually be
172  // canonicalized to be an allocation *of* an array), or allocations whose
173  // type is not itself an array (because we're afraid of pessimizing SRoA).
174  ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
175  if (!ATy || AI->isArrayAllocation())
176  continue;
177 
178  // Get the list of all available allocas for this array type.
179  std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
180 
181  // Loop over the allocas in AllocasForType to see if we can reuse one. Note
182  // that we have to be careful not to reuse the same "available" alloca for
183  // multiple different allocas that we just inlined, we use the 'UsedAllocas'
184  // set to keep track of which "available" allocas are being used by this
185  // function. Also, AllocasForType can be empty of course!
186  bool MergedAwayAlloca = false;
187  for (AllocaInst *AvailableAlloca : AllocasForType) {
188  Align Align1 = AI->getAlign();
189  Align Align2 = AvailableAlloca->getAlign();
190 
191  // The available alloca has to be in the right function, not in some other
192  // function in this SCC.
193  if (AvailableAlloca->getParent() != AI->getParent())
194  continue;
195 
196  // If the inlined function already uses this alloca then we can't reuse
197  // it.
198  if (!UsedAllocas.insert(AvailableAlloca).second)
199  continue;
200 
201  // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
202  // success!
203  LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
204  << "\n\t\tINTO: " << *AvailableAlloca << '\n');
205 
206  // Move affected dbg.declare calls immediately after the new alloca to
207  // avoid the situation when a dbg.declare precedes its alloca.
208  if (auto *L = LocalAsMetadata::getIfExists(AI))
209  if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
210  for (User *U : MDV->users())
211  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
212  DDI->moveBefore(AvailableAlloca->getNextNode());
213 
214  AI->replaceAllUsesWith(AvailableAlloca);
215 
216  if (Align1 > Align2)
217  AvailableAlloca->setAlignment(AI->getAlign());
218 
219  AI->eraseFromParent();
220  MergedAwayAlloca = true;
221  ++NumMergedAllocas;
222  IFI.StaticAllocas[AllocaNo] = nullptr;
223  break;
224  }
225 
226  // If we already nuked the alloca, we're done with it.
227  if (MergedAwayAlloca)
228  continue;
229 
230  // If we were unable to merge away the alloca either because there are no
231  // allocas of the right type available or because we reused them all
232  // already, remember that this alloca came from an inlined function and mark
233  // it used so we don't reuse it for other allocas from this inline
234  // operation.
235  AllocasForType.push_back(AI);
236  UsedAllocas.insert(AI);
237  }
238 }
239 
240 /// If it is possible to inline the specified call site,
241 /// do so and update the CallGraph for this operation.
242 ///
243 /// This function also does some basic book-keeping to update the IR. The
244 /// InlinedArrayAllocas map keeps track of any allocas that are already
245 /// available from other functions inlined into the caller. If we are able to
246 /// inline this call site we attempt to reuse already available allocas or add
247 /// any new allocas to the set if not possible.
249  CallBase &CB, InlineFunctionInfo &IFI,
250  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
251  bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
252  ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
254  Function *Caller = CB.getCaller();
255 
256  AAResults &AAR = AARGetter(*Callee);
257 
258  // Try to inline the function. Get the list of static allocas that were
259  // inlined.
260  InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime);
261  if (!IR.isSuccess())
262  return IR;
263 
265  ImportedFunctionsStats.recordInline(*Caller, *Callee);
266 
268 
270  mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
271 
272  return IR; // success
273 }
274 
275 /// Return true if the specified inline history ID
276 /// indicates an inline history that includes the specified function.
278  Function *F, int InlineHistoryID,
279  const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
280  while (InlineHistoryID != -1) {
281  assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
282  "Invalid inline history ID");
283  if (InlineHistory[InlineHistoryID].first == F)
284  return true;
285  InlineHistoryID = InlineHistory[InlineHistoryID].second;
286  }
287  return false;
288 }
289 
293  return false; // No changes to CallGraph.
294 }
295 
297  if (skipSCC(SCC))
298  return false;
299  return inlineCalls(SCC);
300 }
301 
302 static bool
304  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
305  ProfileSummaryInfo *PSI,
306  std::function<const TargetLibraryInfo &(Function &)> GetTLI,
307  bool InsertLifetime,
308  function_ref<InlineCost(CallBase &CB)> GetInlineCost,
309  function_ref<AAResults &(Function &)> AARGetter,
310  ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
311  SmallPtrSet<Function *, 8> SCCFunctions;
312  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
313  for (CallGraphNode *Node : SCC) {
314  Function *F = Node->getFunction();
315  if (F)
316  SCCFunctions.insert(F);
317  LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
318  }
319 
320  // Scan through and identify all call sites ahead of time so that we only
321  // inline call sites in the original functions, not call sites that result
322  // from inlining other functions.
324 
325  // When inlining a callee produces new call sites, we want to keep track of
326  // the fact that they were inlined from the callee. This allows us to avoid
327  // infinite inlining in some obscure cases. To represent this, we use an
328  // index into the InlineHistory vector.
329  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
330 
331  for (CallGraphNode *Node : SCC) {
332  Function *F = Node->getFunction();
333  if (!F || F->isDeclaration())
334  continue;
335 
337  for (BasicBlock &BB : *F)
338  for (Instruction &I : BB) {
339  auto *CB = dyn_cast<CallBase>(&I);
340  // If this isn't a call, or it is a call to an intrinsic, it can
341  // never be inlined.
342  if (!CB || isa<IntrinsicInst>(I))
343  continue;
344 
345  // If this is a direct call to an external function, we can never inline
346  // it. If it is an indirect call, inlining may resolve it to be a
347  // direct call, so we keep it.
348  if (Function *Callee = CB->getCalledFunction())
349  if (Callee->isDeclaration()) {
350  using namespace ore;
351 
352  setInlineRemark(*CB, "unavailable definition");
353  ORE.emit([&]() {
354  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
355  << NV("Callee", Callee) << " will not be inlined into "
356  << NV("Caller", CB->getCaller())
357  << " because its definition is unavailable"
358  << setIsVerbose();
359  });
360  continue;
361  }
362 
363  CallSites.push_back(std::make_pair(CB, -1));
364  }
365  }
366 
367  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
368 
369  // If there are no calls in this function, exit early.
370  if (CallSites.empty())
371  return false;
372 
373  // Now that we have all of the call sites, move the ones to functions in the
374  // current SCC to the end of the list.
375  unsigned FirstCallInSCC = CallSites.size();
376  for (unsigned I = 0; I < FirstCallInSCC; ++I)
377  if (Function *F = CallSites[I].first->getCalledFunction())
378  if (SCCFunctions.count(F))
379  std::swap(CallSites[I--], CallSites[--FirstCallInSCC]);
380 
381  InlinedArrayAllocasTy InlinedArrayAllocas;
382  InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI);
383 
384  // Now that we have all of the call sites, loop over them and inline them if
385  // it looks profitable to do so.
386  bool Changed = false;
387  bool LocalChange;
388  do {
389  LocalChange = false;
390  // Iterate over the outer loop because inlining functions can cause indirect
391  // calls to become direct calls.
392  // CallSites may be modified inside so ranged for loop can not be used.
393  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
394  auto &P = CallSites[CSi];
395  CallBase &CB = *P.first;
396  const int InlineHistoryID = P.second;
397 
398  Function *Caller = CB.getCaller();
400 
401  // We can only inline direct calls to non-declarations.
402  if (!Callee || Callee->isDeclaration())
403  continue;
404 
405  bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller));
406 
407  if (!IsTriviallyDead) {
408  // If this call site was obtained by inlining another function, verify
409  // that the include path for the function did not include the callee
410  // itself. If so, we'd be recursively inlining the same function,
411  // which would provide the same callsites, which would cause us to
412  // infinitely inline.
413  if (InlineHistoryID != -1 &&
414  inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
415  setInlineRemark(CB, "recursive");
416  continue;
417  }
418  }
419 
420  // FIXME for new PM: because of the old PM we currently generate ORE and
421  // in turn BFI on demand. With the new PM, the ORE dependency should
422  // just become a regular analysis dependency.
423  OptimizationRemarkEmitter ORE(Caller);
424 
425  auto OIC = shouldInline(CB, GetInlineCost, ORE);
426  // If the policy determines that we should inline this function,
427  // delete the call instead.
428  if (!OIC)
429  continue;
430 
431  // If this call site is dead and it is to a readonly function, we should
432  // just delete the call instead of trying to inline it, regardless of
433  // size. This happens because IPSCCP propagates the result out of the
434  // call and then we're left with the dead call.
435  if (IsTriviallyDead) {
436  LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n");
437  // Update the call graph by deleting the edge from Callee to Caller.
438  setInlineRemark(CB, "trivially dead");
439  CG[Caller]->removeCallEdgeFor(CB);
440  CB.eraseFromParent();
441  ++NumCallsDeleted;
442  } else {
443  // Get DebugLoc to report. CB will be invalid after Inliner.
444  DebugLoc DLoc = CB.getDebugLoc();
445  BasicBlock *Block = CB.getParent();
446 
447  // Attempt to inline the function.
448  using namespace ore;
449 
451  CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
452  InsertLifetime, AARGetter, ImportedFunctionsStats);
453  if (!IR.isSuccess()) {
454  setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " +
455  inlineCostStr(*OIC));
456  ORE.emit([&]() {
457  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
458  Block)
459  << NV("Callee", Callee) << " will not be inlined into "
460  << NV("Caller", Caller) << ": "
461  << NV("Reason", IR.getFailureReason());
462  });
463  continue;
464  }
465  ++NumInlined;
466 
467  emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
468 
469  // If inlining this function gave us any new call sites, throw them
470  // onto our worklist to process. They are useful inline candidates.
471  if (!InlineInfo.InlinedCalls.empty()) {
472  // Create a new inline history entry for this, so that we remember
473  // that these new callsites came about due to inlining Callee.
474  int NewHistoryID = InlineHistory.size();
475  InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
476 
477 #ifndef NDEBUG
478  // Make sure no dupplicates in the inline candidates. This could
479  // happen when a callsite is simpilfied to reusing the return value
480  // of another callsite during function cloning, thus the other
481  // callsite will be reconsidered here.
482  DenseSet<CallBase *> DbgCallSites;
483  for (auto &II : CallSites)
484  DbgCallSites.insert(II.first);
485 #endif
486 
487  for (Value *Ptr : InlineInfo.InlinedCalls) {
488 #ifndef NDEBUG
489  assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0);
490 #endif
491  CallSites.push_back(
492  std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID));
493  }
494  }
495  }
496 
497  // If we inlined or deleted the last possible call site to the function,
498  // delete the function body now.
499  if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
500  // TODO: Can remove if in SCC now.
501  !SCCFunctions.count(Callee) &&
502  // The function may be apparently dead, but if there are indirect
503  // callgraph references to the node, we cannot delete it yet, this
504  // could invalidate the CGSCC iterator.
505  CG[Callee]->getNumReferences() == 0) {
506  LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
507  << Callee->getName() << "\n");
508  CallGraphNode *CalleeNode = CG[Callee];
509 
510  // Remove any call graph edges from the callee to its callees.
511  CalleeNode->removeAllCalledFunctions();
512 
513  // Removing the node for callee from the call graph and delete it.
514  delete CG.removeFunctionFromModule(CalleeNode);
515  ++NumDeleted;
516  }
517 
518  // Remove this call site from the list. If possible, use
519  // swap/pop_back for efficiency, but do not use it if doing so would
520  // move a call site to a function in this SCC before the
521  // 'FirstCallInSCC' barrier.
522  if (SCC.isSingular()) {
523  CallSites[CSi] = CallSites.back();
524  CallSites.pop_back();
525  } else {
526  CallSites.erase(CallSites.begin() + CSi);
527  }
528  --CSi;
529 
530  Changed = true;
531  LocalChange = true;
532  }
533  } while (LocalChange);
534 
535  return Changed;
536 }
537 
539  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
540  ACT = &getAnalysis<AssumptionCacheTracker>();
541  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
542  GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
543  return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
544  };
545  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
546  return ACT->getAssumptionCache(F);
547  };
548  return inlineCallsImpl(
549  SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
550  [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this),
552 }
553 
554 /// Remove now-dead linkonce functions at the end of
555 /// processing to avoid breaking the SCC traversal.
560  return removeDeadFunctions(CG);
561 }
562 
563 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
565  bool AlwaysInlineOnly) {
566  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
567  SmallVector<Function *, 16> DeadFunctionsInComdats;
568 
569  auto RemoveCGN = [&](CallGraphNode *CGN) {
570  // Remove any call graph edges from the function to its callees.
571  CGN->removeAllCalledFunctions();
572 
573  // Remove any edges from the external node to the function's call graph
574  // node. These edges might have been made irrelegant due to
575  // optimization of the program.
577 
578  // Removing the node for callee from the call graph and delete it.
579  FunctionsToRemove.push_back(CGN);
580  };
581 
582  // Scan for all of the functions, looking for ones that should now be removed
583  // from the program. Insert the dead ones in the FunctionsToRemove set.
584  for (const auto &I : CG) {
585  CallGraphNode *CGN = I.second.get();
586  Function *F = CGN->getFunction();
587  if (!F || F->isDeclaration())
588  continue;
589 
590  // Handle the case when this function is called and we only want to care
591  // about always-inline functions. This is a bit of a hack to share code
592  // between here and the InlineAlways pass.
593  if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
594  continue;
595 
596  // If the only remaining users of the function are dead constants, remove
597  // them.
598  F->removeDeadConstantUsers();
599 
600  if (!F->isDefTriviallyDead())
601  continue;
602 
603  // It is unsafe to drop a function with discardable linkage from a COMDAT
604  // without also dropping the other members of the COMDAT.
605  // The inliner doesn't visit non-function entities which are in COMDAT
606  // groups so it is unsafe to do so *unless* the linkage is local.
607  if (!F->hasLocalLinkage()) {
608  if (F->hasComdat()) {
609  DeadFunctionsInComdats.push_back(F);
610  continue;
611  }
612  }
613 
614  RemoveCGN(CGN);
615  }
616  if (!DeadFunctionsInComdats.empty()) {
617  // Filter out the functions whose comdats remain alive.
618  filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
619  // Remove the rest.
620  for (Function *F : DeadFunctionsInComdats)
621  RemoveCGN(CG[F]);
622  }
623 
624  if (FunctionsToRemove.empty())
625  return false;
626 
627  // Now that we know which functions to delete, do so. We didn't want to do
628  // this inline, because that would invalidate our CallGraph::iterator
629  // objects. :(
630  //
631  // Note that it doesn't matter that we are iterating over a non-stable order
632  // here to do this, it doesn't matter which order the functions are deleted
633  // in.
634  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
635  FunctionsToRemove.erase(
636  std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
637  FunctionsToRemove.end());
638  for (CallGraphNode *CGN : FunctionsToRemove) {
639  delete CG.removeFunctionFromModule(CGN);
640  ++NumDeleted;
641  }
642  return true;
643 }
644 
646 InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
648  if (OwnedAdvisor)
649  return *OwnedAdvisor;
650 
652  if (!IAA) {
653  // It should still be possible to run the inliner as a stand-alone SCC pass,
654  // for test scenarios. In that case, we default to the
655  // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass
656  // runs. It also uses just the default InlineParams.
657  // In this case, we need to use the provided FAM, which is valid for the
658  // duration of the inliner pass, and thus the lifetime of the owned advisor.
659  // The one we would get from the MAM can be invalidated as a result of the
660  // inliner's activity.
661  OwnedAdvisor =
662  std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
663 
664  if (!CGSCCInlineReplayFile.empty())
665  OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
666  M, FAM, M.getContext(), std::move(OwnedAdvisor),
668  /*EmitRemarks=*/true);
669 
670  return *OwnedAdvisor;
671  }
672  assert(IAA->getAdvisor() &&
673  "Expected a present InlineAdvisorAnalysis also have an "
674  "InlineAdvisor initialized");
675  return *IAA->getAdvisor();
676 }
677 
680  CGSCCUpdateResult &UR) {
681  const auto &MAMProxy =
682  AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
683  bool Changed = false;
684 
685  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
686  Module &M = *InitialC.begin()->getFunction().getParent();
687  ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M);
688 
691  .getManager();
692 
693  InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
694  Advisor.onPassEntry();
695 
696  auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
697 
698  // We use a single common worklist for calls across the entire SCC. We
699  // process these in-order and append new calls introduced during inlining to
700  // the end. The PriorityInlineOrder is optional here, in which the smaller
701  // callee would have a higher priority to inline.
702  //
703  // Note that this particular order of processing is actually critical to
704  // avoid very bad behaviors. Consider *highly connected* call graphs where
705  // each function contains a small amount of code and a couple of calls to
706  // other functions. Because the LLVM inliner is fundamentally a bottom-up
707  // inliner, it can handle gracefully the fact that these all appear to be
708  // reasonable inlining candidates as it will flatten things until they become
709  // too big to inline, and then move on and flatten another batch.
710  //
711  // However, when processing call edges *within* an SCC we cannot rely on this
712  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
713  // functions we can end up incrementally inlining N calls into each of
714  // N functions because each incremental inlining decision looks good and we
715  // don't have a topological ordering to prevent explosions.
716  //
717  // To compensate for this, we don't process transitive edges made immediate
718  // by inlining until we've done one pass of inlining across the entire SCC.
719  // Large, highly connected SCCs still lead to some amount of code bloat in
720  // this model, but it is uniformly spread across all the functions in the SCC
721  // and eventually they all become too large to inline, rather than
722  // incrementally maknig a single function grow in a super linear fashion.
723  std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
725  Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
726  else
727  Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
728  assert(Calls != nullptr && "Expected an initialized InlineOrder");
729 
730  // Populate the initial list of calls in this SCC.
731  for (auto &N : InitialC) {
732  auto &ORE =
734  // We want to generally process call sites top-down in order for
735  // simplifications stemming from replacing the call with the returned value
736  // after inlining to be visible to subsequent inlining decisions.
737  // FIXME: Using instructions sequence is a really bad way to do this.
738  // Instead we should do an actual RPO walk of the function body.
739  for (Instruction &I : instructions(N.getFunction()))
740  if (auto *CB = dyn_cast<CallBase>(&I))
741  if (Function *Callee = CB->getCalledFunction()) {
742  if (!Callee->isDeclaration())
743  Calls->push({CB, -1});
744  else if (!isa<IntrinsicInst>(I)) {
745  using namespace ore;
746  setInlineRemark(*CB, "unavailable definition");
747  ORE.emit([&]() {
748  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
749  << NV("Callee", Callee) << " will not be inlined into "
750  << NV("Caller", CB->getCaller())
751  << " because its definition is unavailable"
752  << setIsVerbose();
753  });
754  }
755  }
756  }
757  if (Calls->empty())
758  return PreservedAnalyses::all();
759 
760  // Capture updatable variable for the current SCC.
761  auto *C = &InitialC;
762 
763  // When inlining a callee produces new call sites, we want to keep track of
764  // the fact that they were inlined from the callee. This allows us to avoid
765  // infinite inlining in some obscure cases. To represent this, we use an
766  // index into the InlineHistory vector.
767  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
768 
769  // Track a set vector of inlined callees so that we can augment the caller
770  // with all of their edges in the call graph before pruning out the ones that
771  // got simplified away.
772  SmallSetVector<Function *, 4> InlinedCallees;
773 
774  // Track the dead functions to delete once finished with inlining calls. We
775  // defer deleting these to make it easier to handle the call graph updates.
776  SmallVector<Function *, 4> DeadFunctions;
777 
778  // Loop forward over all of the calls.
779  while (!Calls->empty()) {
780  // We expect the calls to typically be batched with sequences of calls that
781  // have the same caller, so we first set up some shared infrastructure for
782  // this caller. We also do any pruning we can at this layer on the caller
783  // alone.
784  Function &F = *Calls->front().first->getCaller();
785  LazyCallGraph::Node &N = *CG.lookup(F);
786  if (CG.lookupSCC(N) != C) {
787  Calls->pop();
788  continue;
789  }
790 
791  LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
792  << " Function size: " << F.getInstructionCount()
793  << "\n");
794 
795  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
797  };
798 
799  // Now process as many calls as we have within this caller in the sequence.
800  // We bail out as soon as the caller has to change so we can update the
801  // call graph and prepare the context of that new caller.
802  bool DidInline = false;
803  while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
804  auto P = Calls->pop();
805  CallBase *CB = P.first;
806  const int InlineHistoryID = P.second;
808 
809  if (InlineHistoryID != -1 &&
810  inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
811  setInlineRemark(*CB, "recursive");
812  continue;
813  }
814 
815  // Check if this inlining may repeat breaking an SCC apart that has
816  // already been split once before. In that case, inlining here may
817  // trigger infinite inlining, much like is prevented within the inliner
818  // itself by the InlineHistory above, but spread across CGSCC iterations
819  // and thus hidden from the full inline history.
820  if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
821  UR.InlinedInternalEdges.count({&N, C})) {
822  LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
823  "previously split out of this SCC by inlining: "
824  << F.getName() << " -> " << Callee.getName() << "\n");
825  setInlineRemark(*CB, "recursive SCC split");
826  continue;
827  }
828 
829  auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
830  // Check whether we want to inline this callsite.
831  if (!Advice->isInliningRecommended()) {
832  Advice->recordUnattemptedInlining();
833  continue;
834  }
835 
836  // Setup the data structure used to plumb customization into the
837  // `InlineFunction` routine.
838  InlineFunctionInfo IFI(
839  /*cg=*/nullptr, GetAssumptionCache, PSI,
842 
843  InlineResult IR =
844  InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
845  if (!IR.isSuccess()) {
846  Advice->recordUnsuccessfulInlining(IR);
847  continue;
848  }
849 
850  DidInline = true;
851  InlinedCallees.insert(&Callee);
852  ++NumInlined;
853 
854  LLVM_DEBUG(dbgs() << " Size after inlining: "
855  << F.getInstructionCount() << "\n");
856 
857  // Add any new callsites to defined functions to the worklist.
858  if (!IFI.InlinedCallSites.empty()) {
859  int NewHistoryID = InlineHistory.size();
860  InlineHistory.push_back({&Callee, InlineHistoryID});
861 
862  for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
863  Function *NewCallee = ICB->getCalledFunction();
864  assert(!(NewCallee && NewCallee->isIntrinsic()) &&
865  "Intrinsic calls should not be tracked.");
866  if (!NewCallee) {
867  // Try to promote an indirect (virtual) call without waiting for
868  // the post-inline cleanup and the next DevirtSCCRepeatedPass
869  // iteration because the next iteration may not happen and we may
870  // miss inlining it.
871  if (tryPromoteCall(*ICB))
872  NewCallee = ICB->getCalledFunction();
873  }
874  if (NewCallee)
875  if (!NewCallee->isDeclaration())
876  Calls->push({ICB, NewHistoryID});
877  }
878  }
879 
880  // Merge the attributes based on the inlining.
882 
883  // For local functions, check whether this makes the callee trivially
884  // dead. In that case, we can drop the body of the function eagerly
885  // which may reduce the number of callers of other functions to one,
886  // changing inline cost thresholds.
887  bool CalleeWasDeleted = false;
888  if (Callee.hasLocalLinkage()) {
889  // To check this we also need to nuke any dead constant uses (perhaps
890  // made dead by this operation on other functions).
891  Callee.removeDeadConstantUsers();
892  if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
893  Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
894  return Call.first->getCaller() == &Callee;
895  });
896  // Clear the body and queue the function itself for deletion when we
897  // finish inlining and call graph updates.
898  // Note that after this point, it is an error to do anything other
899  // than use the callee's address or delete it.
900  Callee.dropAllReferences();
901  assert(!is_contained(DeadFunctions, &Callee) &&
902  "Cannot put cause a function to become dead twice!");
903  DeadFunctions.push_back(&Callee);
904  CalleeWasDeleted = true;
905  }
906  }
907  if (CalleeWasDeleted)
908  Advice->recordInliningWithCalleeDeleted();
909  else
910  Advice->recordInlining();
911  }
912 
913  if (!DidInline)
914  continue;
915  Changed = true;
916 
917  // At this point, since we have made changes we have at least removed
918  // a call instruction. However, in the process we do some incremental
919  // simplification of the surrounding code. This simplification can
920  // essentially do all of the same things as a function pass and we can
921  // re-use the exact same logic for updating the call graph to reflect the
922  // change.
923 
924  // Inside the update, we also update the FunctionAnalysisManager in the
925  // proxy for this particular SCC. We do this as the SCC may have changed and
926  // as we're going to mutate this particular function we want to make sure
927  // the proxy is in place to forward any invalidation events.
928  LazyCallGraph::SCC *OldC = C;
929  C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM);
930  LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
931 
932  // If this causes an SCC to split apart into multiple smaller SCCs, there
933  // is a subtle risk we need to prepare for. Other transformations may
934  // expose an "infinite inlining" opportunity later, and because of the SCC
935  // mutation, we will revisit this function and potentially re-inline. If we
936  // do, and that re-inlining also has the potentially to mutate the SCC
937  // structure, the infinite inlining problem can manifest through infinite
938  // SCC splits and merges. To avoid this, we capture the originating caller
939  // node and the SCC containing the call edge. This is a slight over
940  // approximation of the possible inlining decisions that must be avoided,
941  // but is relatively efficient to store. We use C != OldC to know when
942  // a new SCC is generated and the original SCC may be generated via merge
943  // in later iterations.
944  //
945  // It is also possible that even if no new SCC is generated
946  // (i.e., C == OldC), the original SCC could be split and then merged
947  // into the same one as itself. and the original SCC will be added into
948  // UR.CWorklist again, we want to catch such cases too.
949  //
950  // FIXME: This seems like a very heavyweight way of retaining the inline
951  // history, we should look for a more efficient way of tracking it.
952  if ((C != OldC || UR.CWorklist.count(OldC)) &&
953  llvm::any_of(InlinedCallees, [&](Function *Callee) {
954  return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
955  })) {
956  LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
957  "retaining this to avoid infinite inlining.\n");
958  UR.InlinedInternalEdges.insert({&N, OldC});
959  }
960  InlinedCallees.clear();
961  }
962 
963  // Now that we've finished inlining all of the calls across this SCC, delete
964  // all of the trivially dead functions, updating the call graph and the CGSCC
965  // pass manager in the process.
966  //
967  // Note that this walks a pointer set which has non-deterministic order but
968  // that is OK as all we do is delete things and add pointers to unordered
969  // sets.
970  for (Function *DeadF : DeadFunctions) {
971  // Get the necessary information out of the call graph and nuke the
972  // function there. Also, clear out any cached analyses.
973  auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
974  FAM.clear(*DeadF, DeadF->getName());
975  AM.clear(DeadC, DeadC.getName());
976  auto &DeadRC = DeadC.getOuterRefSCC();
977  CG.removeDeadFunction(*DeadF);
978 
979  // Mark the relevant parts of the call graph as invalid so we don't visit
980  // them.
981  UR.InvalidatedSCCs.insert(&DeadC);
982  UR.InvalidatedRefSCCs.insert(&DeadRC);
983 
984  // If the updated SCC was the one containing the deleted function, clear it.
985  if (&DeadC == UR.UpdatedC)
986  UR.UpdatedC = nullptr;
987 
988  // And delete the actual function from the module.
989  // The Advisor may use Function pointers to efficiently index various
990  // internal maps, e.g. for memoization. Function cleanup passes like
991  // argument promotion create new functions. It is possible for a new
992  // function to be allocated at the address of a deleted function. We could
993  // index using names, but that's inefficient. Alternatively, we let the
994  // Advisor free the functions when it sees fit.
995  DeadF->getBasicBlockList().clear();
996  M.getFunctionList().remove(DeadF);
997 
998  ++NumDeleted;
999  }
1000 
1001  if (!Changed)
1002  return PreservedAnalyses::all();
1003 
1004  // Even if we change the IR, we update the core CGSCC data structures and so
1005  // can preserve the proxy to the function analysis manager.
1006  PreservedAnalyses PA;
1008  return PA;
1009 }
1010 
1012  bool MandatoryFirst,
1014  unsigned MaxDevirtIterations)
1015  : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
1016  PM(), MPM() {
1017  // Run the inliner first. The theory is that we are walking bottom-up and so
1018  // the callees have already been fully optimized, and we want to inline them
1019  // into the callers so that our optimizations can reflect that.
1020  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
1021  // because it makes profile annotation in the backend inaccurate.
1022  if (MandatoryFirst)
1023  PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
1024  PM.addPass(InlinerPass());
1025 }
1026 
1029  auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
1030  if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
1031  M.getContext().emitError(
1032  "Could not setup Inlining Advisor for the requested "
1033  "mode and/or options");
1034  return PreservedAnalyses::all();
1035  }
1036 
1037  // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
1038  // to detect when we devirtualize indirect calls and iterate the SCC passes
1039  // in that case to try and catch knock-on inlining or function attrs
1040  // opportunities. Then we add it to the module pipeline by walking the SCCs
1041  // in postorder (or bottom-up).
1042  // If MaxDevirtIterations is 0, we just don't use the devirtualization
1043  // wrapper.
1044  if (MaxDevirtIterations == 0)
1046  else
1048  createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
1049  MPM.run(M, MAM);
1050 
1051  IAA.clear();
1052 
1053  // The ModulePassManager has already taken care of invalidating analyses.
1054  return PreservedAnalyses::all();
1055 }
CGSCCInlineReplayFile
static cl::opt< std::string > CGSCCInlineReplayFile("cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from cgscc inline remarks."), cl::Hidden)
llvm::CallGraphNode::removeAnyCallEdgeTo
void removeAnyCallEdgeTo(CallGraphNode *Callee)
Removes all call edges from this node to the specified callee function.
Definition: CallGraph.cpp:234
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1061
llvm::array_pod_sort
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1446
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::Function::isIntrinsic
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition: Function.h:211
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:128
Optional.h
llvm::ModuleInlinerWrapperPass::ModuleInlinerWrapperPass
ModuleInlinerWrapperPass(InlineParams Params=getInlineParams(), bool MandatoryFirst=true, InliningAdvisorMode Mode=InliningAdvisorMode::Default, unsigned MaxDevirtIterations=0)
Definition: Inliner.cpp:1011
llvm::LegacyInlinerBase::getInlineCost
virtual InlineCost getInlineCost(CallBase &CB)=0
This method must be implemented by the subclass to determine the cost of inlining the specified call ...
Metadata.h
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:120
llvm::filterDeadComdatFunctions
void filterDeadComdatFunctions(Module &M, SmallVectorImpl< Function * > &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive.
Definition: ModuleUtils.cpp:180
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
Inliner.h
InstIterator.h
llvm::Function
Definition: Function.h:61
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
DisableInlinedAllocaMerging
static cl::opt< bool > DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden)
Flag to disable manual alloca merging.
llvm::InlinerFunctionImportStatsOpts::Verbose
@ Verbose
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::LegacyInlinerBase::inlineCalls
bool inlineCalls(CallGraphSCC &SCC)
This function performs the main work of the pass.
Definition: Inliner.cpp:538
mergeInlinedArrayAllocas
static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory)
Look at all of the allocas that we inlined through this call site.
Definition: Inliner.cpp:145
llvm::LegacyInlinerBase::GetTLI
std::function< const TargetLibraryInfo &(Function &)> GetTLI
Definition: Inliner.h:80
llvm::ImportedFunctionsInliningStatistics
Calculate and dump ThinLTO specific inliner stats.
Definition: ImportedFunctionsInliningStatistics.h:44
llvm::ImportedFunctionsInliningStatistics::setModuleInfo
void setModuleInfo(const Module &M)
Set information like AllFunctions, ImportedFunctions, ModuleName.
Definition: ImportedFunctionsInliningStatistics.cpp:73
Local.h
OptimizationRemarkEmitter.h
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
DenseMap.h
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
STLExtras.h
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
BasicAliasAnalysis.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
InlineEnablePriorityOrder
static cl::opt< bool > InlineEnablePriorityOrder("inline-enable-priority-order", cl::Hidden, cl::init(false), cl::desc("Enable the priority inline order for the inliner"))
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::getAAResultsAnalysisUsage
void getAAResultsAnalysisUsage(AnalysisUsage &AU)
A helper for the legacy pass manager to populate AU to add uses to make sure the analyses required by...
Definition: AliasAnalysis.cpp:989
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::LegacyInlinerBase::LegacyInlinerBase
LegacyInlinerBase(char &ID)
Definition: Inliner.cpp:107
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
ImportedFunctionsInliningStatistics.h
llvm::InlinerFunctionImportStatsOpts::No
@ No
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
llvm::LazyCallGraph::SCC
An SCC of the call graph.
Definition: LazyCallGraph.h:422
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
inlineCallsImpl
static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function< AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, std::function< const TargetLibraryInfo &(Function &)> GetTLI, bool InsertLifetime, function_ref< InlineCost(CallBase &CB)> GetInlineCost, function_ref< AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
Definition: Inliner.cpp:303
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:113
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
MAM
ModuleAnalysisManager MAM
Definition: PassBuilderBindings.cpp:61
llvm::AnalysisManager::clear
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManagerImpl.h:36
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
TargetLibraryInfo.h
InlineAdvisor.h
llvm::CallGraphSCCPass::skipSCC
bool skipSCC(CallGraphSCC &SCC) const
Optional passes call this function to check whether the pass should be skipped.
Definition: CallGraphSCCPass.cpp:752
llvm::make_scope_exit
LLVM_NODISCARD detail::scope_exit< typename std::decay< Callable >::type > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:58
llvm::Instruction
Definition: Instruction.h:45
llvm::OuterAnalysisManagerProxy::Result
Result proxy object for OuterAnalysisManagerProxy.
Definition: PassManager.h:1066
InlineInfo
@ InlineInfo
Definition: FunctionInfo.cpp:24
InlineOrder.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::createDevirtSCCRepeatedPass
DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass, int MaxIterations)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:566
DebugLoc.h
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
SmallPtrSet.h
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3037
LazyCallGraph.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::LegacyInlinerBase::ACT
AssumptionCacheTracker * ACT
Definition: Inliner.h:78
llvm::LegacyInlinerBase::removeDeadFunctions
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly=false)
Remove dead functions.
Definition: Inliner.cpp:564
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::LocalAsMetadata::getIfExists
static LocalAsMetadata * getIfExists(Value *Local)
Definition: Metadata.h:449
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::emitInlinedInto
void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message.
Definition: InlineAdvisor.cpp:438
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
inlineHistoryIncludes
static bool inlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl< std::pair< Function *, int >> &InlineHistory)
Return true if the specified inline history ID indicates an inline history that includes the specifie...
Definition: Inliner.cpp:277
BasicBlock.h
llvm::cl::opt< bool >
llvm::InliningAdvisorMode
InliningAdvisorMode
There are 3 scenarios we can use the InlineAdvisor:
Definition: InlineAdvisor.h:39
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::CallGraphSCCPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
Definition: CallGraphSCCPass.cpp:659
llvm::LegacyInlinerBase::runOnSCC
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
Definition: Inliner.cpp:296
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
ProfileSummaryInfo.h
llvm::CallGraphNode::removeAllCalledFunctions
void removeAllCalledFunctions()
Removes all edges from this CallGraphNode to any functions it calls.
Definition: CallGraph.h:228
llvm::DbgDeclareInst
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:308
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
CGSCCPassManager.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:185
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1612
InlinerFunctionImportStats
cl::opt< InlinerFunctionImportStatsOpts > InlinerFunctionImportStats
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
DEBUG_TYPE
#define DEBUG_TYPE
Definition: Inliner.cpp:76
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::setInlineRemark
void setInlineRemark(CallBase &CB, StringRef Message)
Set the inline-remark attribute.
Definition: InlineAdvisor.cpp:311
llvm::CallGraph::removeFunctionFromModule
Function * removeFunctionFromModule(CallGraphNode *CGN)
Unlink the function from this module, returning it.
Definition: CallGraph.cpp:162
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LegacyInlinerBase::PSI
ProfileSummaryInfo * PSI
Definition: Inliner.h:79
InlineCost.h
inlineCallIfPossible
static InlineResult inlineCallIfPossible(CallBase &CB, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref< AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
If it is possible to inline the specified call site, do so and update the CallGraph for this operatio...
Definition: Inliner.cpp:248
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::LazyCallGraph::Node
A node in the call graph.
Definition: LazyCallGraph.h:318
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:203
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
DataLayout.h
llvm::InlineAdvisor::onPassEntry
virtual void onPassEntry()
This must be called when the Inliner pass is entered, to allow the InlineAdvisor update internal stat...
Definition: InlineAdvisor.h:153
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::InlineAdvisor
Interface for deciding whether to inline a call site or not.
Definition: InlineAdvisor.h:136
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::AllocaInst::isArrayAllocation
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
Definition: Instructions.cpp:1376
CallPromotionUtils.h
BlockFrequencyInfo.h
llvm::InlineFunctionInfo::InlinedCallSites
SmallVector< CallBase *, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:233
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
Attributes.h
llvm::CallGraphNode::getFunction
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:198
llvm::ImportedFunctionsInliningStatistics::recordInline
void recordInline(const Function &Caller, const Function &Callee)
Record inline of.
Definition: ImportedFunctionsInliningStatistics.cpp:46
llvm::CGSCCUpdateResult
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Definition: CGSCCPassManager.h:238
llvm::AssumptionCacheTracker::getAssumptionCache
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
Definition: AssumptionCache.cpp:272
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:798
Casting.h
DiagnosticInfo.h
llvm::InlineAdvisor::onPassExit
virtual void onPassExit()
This must be called when the Inliner pass is exited, as function passes may be run subsequently.
Definition: InlineAdvisor.h:158
Function.h
llvm::CallGraphSCCPass
Definition: CallGraphSCCPass.h:34
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:201
llvm::unique
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:1700
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::inlineCostStr
std::string inlineCostStr(const InlineCost &IC)
Utility for extracting the inline cost message to a string.
Definition: InlineAdvisor.cpp:304
llvm::PassManager::run
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
Definition: PassManager.h:501
llvm::CallGraph::getModule
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:102
llvm::updateCGAndAnalysisManagerForCGSCCPass
LazyCallGraph::SCC & updateCGAndAnalysisManagerForCGSCCPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, FunctionAnalysisManager &FAM)
Helper to update the call graph after running a CGSCC pass.
Definition: CGSCCPassManager.cpp:1227
CallGraph.h
llvm::ModuleInlinerWrapperPass::run
PreservedAnalyses run(Module &, ModuleAnalysisManager &)
Definition: Inliner.cpp:1027
llvm::InlineFunctionInfo::StaticAllocas
SmallVector< AllocaInst *, 4 > StaticAllocas
InlineFunction fills this in with all static allocas that get copied into the caller.
Definition: Cloning.h:222
llvm::MetadataAsValue::getIfExists
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:114
Instructions.h
llvm::InlineAdvisorAnalysis
The InlineAdvisorAnalysis is a module pass because the InlineAdvisor needs to capture state right bef...
Definition: InlineAdvisor.h:216
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
User.h
ModuleUtils.h
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::InlinerPass
The inliner pass for the new pass manager.
Definition: Inliner.h:98
llvm::InlinerPass::run
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition: Inliner.cpp:678
TargetTransformInfo.h
ScopeExit.h
llvm::LegacyInlinerBase::doFinalization
bool doFinalization(CallGraph &CG) override
Remove now-dead linkonce functions at the end of processing to avoid breaking the SCC traversal.
Definition: Inliner.cpp:556
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::LegacyInlinerBase::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
Definition: Inliner.cpp:115
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1760
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
DerivedTypes.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AttributeFuncs::mergeAttributesForInlining
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller's and callee's attributes.
Definition: Attributes.cpp:2110
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::cl::desc
Definition: CommandLine.h:414
llvm::LazyCallGraph
A lazily constructed view of the call graph of a module.
Definition: LazyCallGraph.h:113
raw_ostream.h
llvm::PassManager::addPass
std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:552
llvm::MaxDevirtIterations
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
Definition: PassBuilderPipelines.cpp:179
llvm::FunctionAnalysisManagerCGSCCProxy
A proxy from a FunctionAnalysisManager to an SCC.
Definition: CGSCCPassManager.h:405
Value.h
llvm::LegacyAARGetter
This class is a functor to be used in legacy module or SCC passes for computing AA results for a func...
Definition: BasicAliasAnalysis.h:271
MPM
ModulePassManager MPM
Definition: PassBuilderBindings.cpp:70
llvm::createModuleToPostOrderCGSCCPassAdaptor
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:389
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:159
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::tryPromoteCall
bool tryPromoteCall(CallBase &CB)
Try to promote (devirtualize) a virtual call on an Alloca.
Definition: CallPromotionUtils.cpp:549
SetVector.h
llvm::ImportedFunctionsInliningStatistics::dump
void dump(bool Verbose)
Dump stats computed with InlinerStatistics class.
Definition: ImportedFunctionsInliningStatistics.cpp:97
llvm::shouldInline
Optional< InlineCost > shouldInline(CallBase &CB, function_ref< InlineCost(CallBase &CB)> GetInlineCost, OptimizationRemarkEmitter &ORE, bool EnableDeferral=true)
Return the cost only if the inliner should attempt to inline at the given CallSite.
Definition: InlineAdvisor.cpp:324
llvm::LegacyInlinerBase::doInitialization
bool doInitialization(CallGraph &CG) override
doInitialization - This method is called before the SCC's of the program has been processed,...
Definition: Inliner.cpp:290
llvm::ore::setIsVerbose
DiagnosticInfoOptimizationBase::setIsVerbose setIsVerbose
Definition: OptimizationRemarkEmitter.h:137
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::LegacyInlinerBase::ImportedFunctionsStats
ImportedFunctionsInliningStatistics ImportedFunctionsStats
Definition: Inliner.h:81
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37