LLVM  16.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 #include <climits>
46 #include <limits>
47 
48 using namespace llvm;
49 
50 #define DEBUG_TYPE "inline-cost"
51 
52 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
53 
54 static cl::opt<int>
55  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
56  cl::desc("Default amount of inlining to perform"));
57 
58 // We introduce this option since there is a minor compile-time win by avoiding
59 // addition of TTI attributes (target-features in particular) to inline
60 // candidates when they are guaranteed to be the same as top level methods in
61 // some use cases. If we avoid adding the attribute, we need an option to avoid
62 // checking these attributes.
64  "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
65  cl::desc("Ignore TTI attributes compatibility check between callee/caller "
66  "during inline cost calculation"));
67 
69  "print-instruction-comments", cl::Hidden, cl::init(false),
70  cl::desc("Prints comments for instruction based on inline cost analysis"));
71 
73  "inline-threshold", cl::Hidden, cl::init(225),
74  cl::desc("Control the amount of inlining to perform (default = 225)"));
75 
77  "inlinehint-threshold", cl::Hidden, cl::init(325),
78  cl::desc("Threshold for inlining functions with inline hint"));
79 
80 static cl::opt<int>
81  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
82  cl::init(45),
83  cl::desc("Threshold for inlining cold callsites"));
84 
86  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
87  cl::desc("Enable the cost-benefit analysis for the inliner"));
88 
90  "inline-savings-multiplier", cl::Hidden, cl::init(8),
91  cl::desc("Multiplier to multiply cycle savings by during inlining"));
92 
93 static cl::opt<int>
94  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
95  cl::desc("The maximum size of a callee that get's "
96  "inlined without sufficient cycle savings"));
97 
98 // We introduce this threshold to help performance of instrumentation based
99 // PGO before we actually hook up inliner with analysis passes such as BPI and
100 // BFI.
102  "inlinecold-threshold", cl::Hidden, cl::init(45),
103  cl::desc("Threshold for inlining functions with cold attribute"));
104 
105 static cl::opt<int>
106  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
107  cl::desc("Threshold for hot callsites "));
108 
110  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
111  cl::desc("Threshold for locally hot callsites "));
112 
114  "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
115  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
116  "entry frequency, for a callsite to be cold in the absence of "
117  "profile information."));
118 
120  "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
121  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
122  "entry frequency, for a callsite to be hot in the absence of "
123  "profile information."));
124 
125 static cl::opt<int>
126  InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
127  cl::desc("Cost of a single instruction when inlining"));
128 
129 static cl::opt<int>
130  MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
131  cl::desc("Cost of load/store instruction when inlining"));
132 
134  "inline-call-penalty", cl::Hidden, cl::init(25),
135  cl::desc("Call penalty that is applied per callsite when inlining"));
136 
137 static cl::opt<size_t>
138  StackSizeThreshold("inline-max-stacksize", cl::Hidden,
140  cl::desc("Do not inline functions with a stack size "
141  "that exceeds the specified limit"));
142 
143 static cl::opt<size_t>
144  RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
146  cl::desc("Do not inline recursive functions with a stack "
147  "size that exceeds the specified limit"));
148 
150  "inline-cost-full", cl::Hidden,
151  cl::desc("Compute the full inline cost of a call site even when the cost "
152  "exceeds the threshold."));
153 
155  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
156  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
157  "attributes."));
158 
160  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
161  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
162 
163 namespace llvm {
165  if (Attr.isValid()) {
166  int AttrValue = 0;
167  if (!Attr.getValueAsString().getAsInteger(10, AttrValue))
168  return AttrValue;
169  }
170  return None;
171 }
172 
174  return getStringFnAttrAsInt(CB.getFnAttr(AttrKind));
175 }
176 
178  return getStringFnAttrAsInt(F->getFnAttribute(AttrKind));
179 }
180 
181 namespace InlineConstants {
182 int getInstrCost() { return InstrCost; }
183 
184 } // namespace InlineConstants
185 
186 } // namespace llvm
187 
188 namespace {
189 class InlineCostCallAnalyzer;
190 
191 // This struct is used to store information about inline cost of a
192 // particular instruction
193 struct InstructionCostDetail {
194  int CostBefore = 0;
195  int CostAfter = 0;
196  int ThresholdBefore = 0;
197  int ThresholdAfter = 0;
198 
199  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
200 
201  int getCostDelta() const { return CostAfter - CostBefore; }
202 
203  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
204 };
205 
206 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
207 private:
208  InlineCostCallAnalyzer *const ICCA;
209 
210 public:
211  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
212  void emitInstructionAnnot(const Instruction *I,
213  formatted_raw_ostream &OS) override;
214 };
215 
216 /// Carry out call site analysis, in order to evaluate inlinability.
217 /// NOTE: the type is currently used as implementation detail of functions such
218 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
219 /// expectation is that they come from the outer scope, from the wrapper
220 /// functions. If we want to support constructing CallAnalyzer objects where
221 /// lambdas are provided inline at construction, or where the object needs to
222 /// otherwise survive past the scope of the provided functions, we need to
223 /// revisit the argument types.
224 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
226  friend class InstVisitor<CallAnalyzer, bool>;
227 
228 protected:
229  virtual ~CallAnalyzer() = default;
230  /// The TargetTransformInfo available for this compilation.
231  const TargetTransformInfo &TTI;
232 
233  /// Getter for the cache of @llvm.assume intrinsics.
234  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
235 
236  /// Getter for BlockFrequencyInfo
238 
239  /// Profile summary information.
240  ProfileSummaryInfo *PSI;
241 
242  /// The called function.
243  Function &F;
244 
245  // Cache the DataLayout since we use it a lot.
246  const DataLayout &DL;
247 
248  /// The OptimizationRemarkEmitter available for this compilation.
250 
251  /// The candidate callsite being analyzed. Please do not use this to do
252  /// analysis in the caller function; we want the inline cost query to be
253  /// easily cacheable. Instead, use the cover function paramHasAttr.
254  CallBase &CandidateCall;
255 
256  /// Extension points for handling callsite features.
257  // Called before a basic block was analyzed.
258  virtual void onBlockStart(const BasicBlock *BB) {}
259 
260  /// Called after a basic block was analyzed.
261  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
262 
263  /// Called before an instruction was analyzed
264  virtual void onInstructionAnalysisStart(const Instruction *I) {}
265 
266  /// Called after an instruction was analyzed
267  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
268 
269  /// Called at the end of the analysis of the callsite. Return the outcome of
270  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
271  /// the reason it can't.
272  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
273  /// Called when we're about to start processing a basic block, and every time
274  /// we are done processing an instruction. Return true if there is no point in
275  /// continuing the analysis (e.g. we've determined already the call site is
276  /// too expensive to inline)
277  virtual bool shouldStop() { return false; }
278 
279  /// Called before the analysis of the callee body starts (with callsite
280  /// contexts propagated). It checks callsite-specific information. Return a
281  /// reason analysis can't continue if that's the case, or 'true' if it may
282  /// continue.
283  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
284  /// Called if the analysis engine decides SROA cannot be done for the given
285  /// alloca.
286  virtual void onDisableSROA(AllocaInst *Arg) {}
287 
288  /// Called the analysis engine determines load elimination won't happen.
289  virtual void onDisableLoadElimination() {}
290 
291  /// Called when we visit a CallBase, before the analysis starts. Return false
292  /// to stop further processing of the instruction.
293  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
294 
295  /// Called to account for a call.
296  virtual void onCallPenalty() {}
297 
298  /// Called to account for a load or store.
299  virtual void onMemAccess(){};
300 
301  /// Called to account for the expectation the inlining would result in a load
302  /// elimination.
303  virtual void onLoadEliminationOpportunity() {}
304 
305  /// Called to account for the cost of argument setup for the Call in the
306  /// callee's body (not the callsite currently under analysis).
307  virtual void onCallArgumentSetup(const CallBase &Call) {}
308 
309  /// Called to account for a load relative intrinsic.
310  virtual void onLoadRelativeIntrinsic() {}
311 
312  /// Called to account for a lowered call.
313  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
314  }
315 
316  /// Account for a jump table of given size. Return false to stop further
317  /// processing the switch instruction
318  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
319 
320  /// Account for a case cluster of given size. Return false to stop further
321  /// processing of the instruction.
322  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
323 
324  /// Called at the end of processing a switch instruction, with the given
325  /// number of case clusters.
326  virtual void onFinalizeSwitch(unsigned JumpTableSize,
327  unsigned NumCaseCluster) {}
328 
329  /// Called to account for any other instruction not specifically accounted
330  /// for.
331  virtual void onMissedSimplification() {}
332 
333  /// Start accounting potential benefits due to SROA for the given alloca.
334  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
335 
336  /// Account SROA savings for the AllocaInst value.
337  virtual void onAggregateSROAUse(AllocaInst *V) {}
338 
339  bool handleSROA(Value *V, bool DoNotDisable) {
340  // Check for SROA candidates in comparisons.
341  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
342  if (DoNotDisable) {
343  onAggregateSROAUse(SROAArg);
344  return true;
345  }
346  disableSROAForArg(SROAArg);
347  }
348  return false;
349  }
350 
351  bool IsCallerRecursive = false;
352  bool IsRecursiveCall = false;
353  bool ExposesReturnsTwice = false;
354  bool HasDynamicAlloca = false;
355  bool ContainsNoDuplicateCall = false;
356  bool HasReturn = false;
357  bool HasIndirectBr = false;
358  bool HasUninlineableIntrinsic = false;
359  bool InitsVargArgs = false;
360 
361  /// Number of bytes allocated statically by the callee.
362  uint64_t AllocatedSize = 0;
363  unsigned NumInstructions = 0;
364  unsigned NumVectorInstructions = 0;
365 
366  /// While we walk the potentially-inlined instructions, we build up and
367  /// maintain a mapping of simplified values specific to this callsite. The
368  /// idea is to propagate any special information we have about arguments to
369  /// this call through the inlinable section of the function, and account for
370  /// likely simplifications post-inlining. The most important aspect we track
371  /// is CFG altering simplifications -- when we prove a basic block dead, that
372  /// can cause dramatic shifts in the cost of inlining a function.
373  DenseMap<Value *, Constant *> SimplifiedValues;
374 
375  /// Keep track of the values which map back (through function arguments) to
376  /// allocas on the caller stack which could be simplified through SROA.
377  DenseMap<Value *, AllocaInst *> SROAArgValues;
378 
379  /// Keep track of Allocas for which we believe we may get SROA optimization.
380  DenseSet<AllocaInst *> EnabledSROAAllocas;
381 
382  /// Keep track of values which map to a pointer base and constant offset.
384 
385  /// Keep track of dead blocks due to the constant arguments.
387 
388  /// The mapping of the blocks to their known unique successors due to the
389  /// constant arguments.
390  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
391 
392  /// Model the elimination of repeated loads that is expected to happen
393  /// whenever we simplify away the stores that would otherwise cause them to be
394  /// loads.
395  bool EnableLoadElimination = true;
396 
397  /// Whether we allow inlining for recursive call.
398  bool AllowRecursiveCall = false;
399 
400  SmallPtrSet<Value *, 16> LoadAddrSet;
401 
402  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
403  auto It = SROAArgValues.find(V);
404  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
405  return nullptr;
406  return It->second;
407  }
408 
409  // Custom simplification helper routines.
410  bool isAllocaDerivedArg(Value *V);
411  void disableSROAForArg(AllocaInst *SROAArg);
412  void disableSROA(Value *V);
413  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
414  void disableLoadElimination();
415  bool isGEPFree(GetElementPtrInst &GEP);
416  bool canFoldInboundsGEP(GetElementPtrInst &I);
417  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
418  bool simplifyCallSite(Function *F, CallBase &Call);
420  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
421  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
422 
423  /// Return true if the given argument to the function being considered for
424  /// inlining has the given attribute set either at the call site or the
425  /// function declaration. Primarily used to inspect call site specific
426  /// attributes since these can be more precise than the ones on the callee
427  /// itself.
428  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
429 
430  /// Return true if the given value is known non null within the callee if
431  /// inlined through this particular callsite.
432  bool isKnownNonNullInCallee(Value *V);
433 
434  /// Return true if size growth is allowed when inlining the callee at \p Call.
435  bool allowSizeGrowth(CallBase &Call);
436 
437  // Custom analysis routines.
438  InlineResult analyzeBlock(BasicBlock *BB,
439  SmallPtrSetImpl<const Value *> &EphValues);
440 
441  // Disable several entry points to the visitor so we don't accidentally use
442  // them by declaring but not defining them here.
443  void visit(Module *);
444  void visit(Module &);
445  void visit(Function *);
446  void visit(Function &);
447  void visit(BasicBlock *);
448  void visit(BasicBlock &);
449 
450  // Provide base case for our instruction visit.
451  bool visitInstruction(Instruction &I);
452 
453  // Our visit overrides.
454  bool visitAlloca(AllocaInst &I);
455  bool visitPHI(PHINode &I);
456  bool visitGetElementPtr(GetElementPtrInst &I);
457  bool visitBitCast(BitCastInst &I);
458  bool visitPtrToInt(PtrToIntInst &I);
459  bool visitIntToPtr(IntToPtrInst &I);
460  bool visitCastInst(CastInst &I);
461  bool visitCmpInst(CmpInst &I);
462  bool visitSub(BinaryOperator &I);
463  bool visitBinaryOperator(BinaryOperator &I);
464  bool visitFNeg(UnaryOperator &I);
465  bool visitLoad(LoadInst &I);
466  bool visitStore(StoreInst &I);
467  bool visitExtractValue(ExtractValueInst &I);
468  bool visitInsertValue(InsertValueInst &I);
469  bool visitCallBase(CallBase &Call);
470  bool visitReturnInst(ReturnInst &RI);
471  bool visitBranchInst(BranchInst &BI);
472  bool visitSelectInst(SelectInst &SI);
473  bool visitSwitchInst(SwitchInst &SI);
474  bool visitIndirectBrInst(IndirectBrInst &IBI);
475  bool visitResumeInst(ResumeInst &RI);
476  bool visitCleanupReturnInst(CleanupReturnInst &RI);
477  bool visitCatchReturnInst(CatchReturnInst &RI);
478  bool visitUnreachableInst(UnreachableInst &I);
479 
480 public:
481  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
482  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
483  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
484  ProfileSummaryInfo *PSI = nullptr,
485  OptimizationRemarkEmitter *ORE = nullptr)
486  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
487  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
488  CandidateCall(Call) {}
489 
490  InlineResult analyze();
491 
492  Optional<Constant *> getSimplifiedValue(Instruction *I) {
493  if (SimplifiedValues.find(I) != SimplifiedValues.end())
494  return SimplifiedValues[I];
495  return None;
496  }
497 
498  // Keep a bunch of stats about the cost savings found so we can print them
499  // out when debugging.
500  unsigned NumConstantArgs = 0;
501  unsigned NumConstantOffsetPtrArgs = 0;
502  unsigned NumAllocaArgs = 0;
503  unsigned NumConstantPtrCmps = 0;
504  unsigned NumConstantPtrDiffs = 0;
505  unsigned NumInstructionsSimplified = 0;
506 
507  void dump();
508 };
509 
510 // Considering forming a binary search, we should find the number of nodes
511 // which is same as the number of comparisons when lowered. For a given
512 // number of clusters, n, we can define a recursive function, f(n), to find
513 // the number of nodes in the tree. The recursion is :
514 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
515 // and f(n) = n, when n <= 3.
516 // This will lead a binary tree where the leaf should be either f(2) or f(3)
517 // when n > 3. So, the number of comparisons from leaves should be n, while
518 // the number of non-leaf should be :
519 // 2^(log2(n) - 1) - 1
520 // = 2^log2(n) * 2^-1 - 1
521 // = n / 2 - 1.
522 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
523 // number of comparisons in a simple closed form :
524 // n + n / 2 - 1 = n * 3 / 2 - 1
525 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
526  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
527 }
528 
529 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
530 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
531 class InlineCostCallAnalyzer final : public CallAnalyzer {
532  const bool ComputeFullInlineCost;
533  int LoadEliminationCost = 0;
534  /// Bonus to be applied when percentage of vector instructions in callee is
535  /// high (see more details in updateThreshold).
536  int VectorBonus = 0;
537  /// Bonus to be applied when the callee has only one reachable basic block.
538  int SingleBBBonus = 0;
539 
540  /// Tunable parameters that control the analysis.
541  const InlineParams &Params;
542 
543  // This DenseMap stores the delta change in cost and threshold after
544  // accounting for the given instruction. The map is filled only with the
545  // flag PrintInstructionComments on.
547 
548  /// Upper bound for the inlining cost. Bonuses are being applied to account
549  /// for speculative "expected profit" of the inlining decision.
550  int Threshold = 0;
551 
552  /// The amount of StaticBonus applied.
553  int StaticBonusApplied = 0;
554 
555  /// Attempt to evaluate indirect calls to boost its inline cost.
556  const bool BoostIndirectCalls;
557 
558  /// Ignore the threshold when finalizing analysis.
559  const bool IgnoreThreshold;
560 
561  // True if the cost-benefit-analysis-based inliner is enabled.
562  const bool CostBenefitAnalysisEnabled;
563 
564  /// Inlining cost measured in abstract units, accounts for all the
565  /// instructions expected to be executed for a given function invocation.
566  /// Instructions that are statically proven to be dead based on call-site
567  /// arguments are not counted here.
568  int Cost = 0;
569 
570  // The cumulative cost at the beginning of the basic block being analyzed. At
571  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
572  // the size of that basic block.
573  int CostAtBBStart = 0;
574 
575  // The static size of live but cold basic blocks. This is "static" in the
576  // sense that it's not weighted by profile counts at all.
577  int ColdSize = 0;
578 
579  // Whether inlining is decided by cost-threshold analysis.
580  bool DecidedByCostThreshold = false;
581 
582  // Whether inlining is decided by cost-benefit analysis.
583  bool DecidedByCostBenefit = false;
584 
585  // The cost-benefit pair computed by cost-benefit analysis.
587 
588  bool SingleBB = true;
589 
590  unsigned SROACostSavings = 0;
591  unsigned SROACostSavingsLost = 0;
592 
593  /// The mapping of caller Alloca values to their accumulated cost savings. If
594  /// we have to disable SROA for one of the allocas, this tells us how much
595  /// cost must be added.
596  DenseMap<AllocaInst *, int> SROAArgCosts;
597 
598  /// Return true if \p Call is a cold callsite.
599  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
600 
601  /// Update Threshold based on callsite properties such as callee
602  /// attributes and callee hotness for PGO builds. The Callee is explicitly
603  /// passed to support analyzing indirect calls whose target is inferred by
604  /// analysis.
605  void updateThreshold(CallBase &Call, Function &Callee);
606  /// Return a higher threshold if \p Call is a hot callsite.
607  Optional<int> getHotCallSiteThreshold(CallBase &Call,
608  BlockFrequencyInfo *CallerBFI);
609 
610  /// Handle a capped 'int' increment for Cost.
611  void addCost(int64_t Inc) {
612  Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
613  Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc + Cost), INT_MIN);
614  }
615 
616  void onDisableSROA(AllocaInst *Arg) override {
617  auto CostIt = SROAArgCosts.find(Arg);
618  if (CostIt == SROAArgCosts.end())
619  return;
620  addCost(CostIt->second);
621  SROACostSavings -= CostIt->second;
622  SROACostSavingsLost += CostIt->second;
623  SROAArgCosts.erase(CostIt);
624  }
625 
626  void onDisableLoadElimination() override {
627  addCost(LoadEliminationCost);
628  LoadEliminationCost = 0;
629  }
630 
631  bool onCallBaseVisitStart(CallBase &Call) override {
632  if (Optional<int> AttrCallThresholdBonus =
633  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
634  Threshold += *AttrCallThresholdBonus;
635 
636  if (Optional<int> AttrCallCost =
637  getStringFnAttrAsInt(Call, "call-inline-cost")) {
638  addCost(*AttrCallCost);
639  // Prevent further processing of the call since we want to override its
640  // inline cost, not just add to it.
641  return false;
642  }
643  return true;
644  }
645 
646  void onCallPenalty() override { addCost(CallPenalty); }
647 
648  void onMemAccess() override { addCost(MemAccessCost); }
649 
650  void onCallArgumentSetup(const CallBase &Call) override {
651  // Pay the price of the argument setup. We account for the average 1
652  // instruction per call argument setup here.
653  addCost(Call.arg_size() * InstrCost);
654  }
655  void onLoadRelativeIntrinsic() override {
656  // This is normally lowered to 4 LLVM instructions.
657  addCost(3 * InstrCost);
658  }
659  void onLoweredCall(Function *F, CallBase &Call,
660  bool IsIndirectCall) override {
661  // We account for the average 1 instruction per call argument setup here.
662  addCost(Call.arg_size() * InstrCost);
663 
664  // If we have a constant that we are calling as a function, we can peer
665  // through it and see the function target. This happens not infrequently
666  // during devirtualization and so we want to give it a hefty bonus for
667  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
668  // Pretend to inline the function, with a custom threshold.
669  if (IsIndirectCall && BoostIndirectCalls) {
670  auto IndirectCallParams = Params;
671  IndirectCallParams.DefaultThreshold =
673  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
674  /// to instantiate the derived class.
675  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
676  GetAssumptionCache, GetBFI, PSI, ORE, false);
677  if (CA.analyze().isSuccess()) {
678  // We were able to inline the indirect call! Subtract the cost from the
679  // threshold to get the bonus we want to apply, but don't go below zero.
680  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
681  }
682  } else
683  // Otherwise simply add the cost for merely making the call.
684  addCost(CallPenalty);
685  }
686 
687  void onFinalizeSwitch(unsigned JumpTableSize,
688  unsigned NumCaseCluster) override {
689  // If suitable for a jump table, consider the cost for the table size and
690  // branch to destination.
691  // Maximum valid cost increased in this function.
692  if (JumpTableSize) {
693  int64_t JTCost =
694  static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
695 
696  addCost(JTCost);
697  return;
698  }
699 
700  if (NumCaseCluster <= 3) {
701  // Suppose a comparison includes one compare and one conditional branch.
702  addCost(NumCaseCluster * 2 * InstrCost);
703  return;
704  }
705 
706  int64_t ExpectedNumberOfCompare =
707  getExpectedNumberOfCompare(NumCaseCluster);
708  int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
709 
710  addCost(SwitchCost);
711  }
712  void onMissedSimplification() override { addCost(InstrCost); }
713 
714  void onInitializeSROAArg(AllocaInst *Arg) override {
715  assert(Arg != nullptr &&
716  "Should not initialize SROA costs for null value.");
717  SROAArgCosts[Arg] = 0;
718  }
719 
720  void onAggregateSROAUse(AllocaInst *SROAArg) override {
721  auto CostIt = SROAArgCosts.find(SROAArg);
722  assert(CostIt != SROAArgCosts.end() &&
723  "expected this argument to have a cost");
724  CostIt->second += InstrCost;
725  SROACostSavings += InstrCost;
726  }
727 
728  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
729 
730  void onBlockAnalyzed(const BasicBlock *BB) override {
731  if (CostBenefitAnalysisEnabled) {
732  // Keep track of the static size of live but cold basic blocks. For now,
733  // we define a cold basic block to be one that's never executed.
734  assert(GetBFI && "GetBFI must be available");
735  BlockFrequencyInfo *BFI = &(GetBFI(F));
736  assert(BFI && "BFI must be available");
737  auto ProfileCount = BFI->getBlockProfileCount(BB);
739  if (ProfileCount.value() == 0)
740  ColdSize += Cost - CostAtBBStart;
741  }
742 
743  auto *TI = BB->getTerminator();
744  // If we had any successors at this point, than post-inlining is likely to
745  // have them as well. Note that we assume any basic blocks which existed
746  // due to branches or switches which folded above will also fold after
747  // inlining.
748  if (SingleBB && TI->getNumSuccessors() > 1) {
749  // Take off the bonus we applied to the threshold.
750  Threshold -= SingleBBBonus;
751  SingleBB = false;
752  }
753  }
754 
755  void onInstructionAnalysisStart(const Instruction *I) override {
756  // This function is called to store the initial cost of inlining before
757  // the given instruction was assessed.
759  return;
760  InstructionCostDetailMap[I].CostBefore = Cost;
761  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
762  }
763 
764  void onInstructionAnalysisFinish(const Instruction *I) override {
765  // This function is called to find new values of cost and threshold after
766  // the instruction has been assessed.
768  return;
769  InstructionCostDetailMap[I].CostAfter = Cost;
770  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
771  }
772 
773  bool isCostBenefitAnalysisEnabled() {
774  if (!PSI || !PSI->hasProfileSummary())
775  return false;
776 
777  if (!GetBFI)
778  return false;
779 
781  // Honor the explicit request from the user.
783  return false;
784  } else {
785  // Otherwise, require instrumentation profile.
786  if (!PSI->hasInstrumentationProfile())
787  return false;
788  }
789 
790  auto *Caller = CandidateCall.getParent()->getParent();
791  if (!Caller->getEntryCount())
792  return false;
793 
794  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
795  if (!CallerBFI)
796  return false;
797 
798  // For now, limit to hot call site.
799  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
800  return false;
801 
802  // Make sure we have a nonzero entry count.
803  auto EntryCount = F.getEntryCount();
804  if (!EntryCount || !EntryCount->getCount())
805  return false;
806 
807  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
808  if (!CalleeBFI)
809  return false;
810 
811  return true;
812  }
813 
814  // Determine whether we should inline the given call site, taking into account
815  // both the size cost and the cycle savings. Return None if we don't have
816  // suficient profiling information to determine.
817  Optional<bool> costBenefitAnalysis() {
818  if (!CostBenefitAnalysisEnabled)
819  return None;
820 
821  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
822  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
823  // falling back to the cost-based metric.
824  // TODO: Improve this hacky condition.
825  if (Threshold == 0)
826  return None;
827 
828  assert(GetBFI);
829  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
830  assert(CalleeBFI);
831 
832  // The cycle savings expressed as the sum of InstrCost
833  // multiplied by the estimated dynamic count of each instruction we can
834  // avoid. Savings come from the call site cost, such as argument setup and
835  // the call instruction, as well as the instructions that are folded.
836  //
837  // We use 128-bit APInt here to avoid potential overflow. This variable
838  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
839  // assumes that we can avoid or fold a billion instructions, each with a
840  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
841  // period on a 4GHz machine.
842  APInt CycleSavings(128, 0);
843 
844  for (auto &BB : F) {
845  APInt CurrentSavings(128, 0);
846  for (auto &I : BB) {
847  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
848  // Count a conditional branch as savings if it becomes unconditional.
849  if (BI->isConditional() &&
850  isa_and_nonnull<ConstantInt>(
851  SimplifiedValues.lookup(BI->getCondition()))) {
852  CurrentSavings += InstrCost;
853  }
854  } else if (Value *V = dyn_cast<Value>(&I)) {
855  // Count an instruction as savings if we can fold it.
856  if (SimplifiedValues.count(V)) {
857  CurrentSavings += InstrCost;
858  }
859  }
860  }
861 
862  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
864  CurrentSavings *= ProfileCount.value();
865  CycleSavings += CurrentSavings;
866  }
867 
868  // Compute the cycle savings per call.
869  auto EntryProfileCount = F.getEntryCount();
870  assert(EntryProfileCount && EntryProfileCount->getCount());
871  auto EntryCount = EntryProfileCount->getCount();
872  CycleSavings += EntryCount / 2;
873  CycleSavings = CycleSavings.udiv(EntryCount);
874 
875  // Compute the total savings for the call site.
876  auto *CallerBB = CandidateCall.getParent();
877  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
878  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
879  CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
880 
881  // Remove the cost of the cold basic blocks.
882  int Size = Cost - ColdSize;
883 
884  // Allow tiny callees to be inlined regardless of whether they meet the
885  // savings threshold.
887 
888  CostBenefit.emplace(APInt(128, Size), CycleSavings);
889 
890  // Return true if the savings justify the cost of inlining. Specifically,
891  // we evaluate the following inequality:
892  //
893  // CycleSavings PSI->getOrCompHotCountThreshold()
894  // -------------- >= -----------------------------------
895  // Size InlineSavingsMultiplier
896  //
897  // Note that the left hand side is specific to a call site. The right hand
898  // side is a constant for the entire executable.
899  APInt LHS = CycleSavings;
901  APInt RHS(128, PSI->getOrCompHotCountThreshold());
902  RHS *= Size;
903  return LHS.uge(RHS);
904  }
905 
906  InlineResult finalizeAnalysis() override {
907  // Loops generally act a lot like calls in that they act like barriers to
908  // movement, require a certain amount of setup, etc. So when optimising for
909  // size, we penalise any call sites that perform loops. We do this after all
910  // other costs here, so will likely only be dealing with relatively small
911  // functions (and hence DT and LI will hopefully be cheap).
912  auto *Caller = CandidateCall.getFunction();
913  if (Caller->hasMinSize()) {
914  DominatorTree DT(F);
915  LoopInfo LI(DT);
916  int NumLoops = 0;
917  for (Loop *L : LI) {
918  // Ignore loops that will not be executed
919  if (DeadBlocks.count(L->getHeader()))
920  continue;
921  NumLoops++;
922  }
923  addCost(NumLoops * InlineConstants::LoopPenalty);
924  }
925 
926  // We applied the maximum possible vector bonus at the beginning. Now,
927  // subtract the excess bonus, if any, from the Threshold before
928  // comparing against Cost.
929  if (NumVectorInstructions <= NumInstructions / 10)
930  Threshold -= VectorBonus;
931  else if (NumVectorInstructions <= NumInstructions / 2)
932  Threshold -= VectorBonus / 2;
933 
934  if (Optional<int> AttrCost =
935  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
936  Cost = *AttrCost;
937 
938  if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
939  CandidateCall,
941  Cost *= *AttrCostMult;
942 
943  if (Optional<int> AttrThreshold =
944  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
945  Threshold = *AttrThreshold;
946 
947  if (auto Result = costBenefitAnalysis()) {
948  DecidedByCostBenefit = true;
949  if (*Result)
950  return InlineResult::success();
951  else
952  return InlineResult::failure("Cost over threshold.");
953  }
954 
955  if (IgnoreThreshold)
956  return InlineResult::success();
957 
958  DecidedByCostThreshold = true;
959  return Cost < std::max(1, Threshold)
961  : InlineResult::failure("Cost over threshold.");
962  }
963 
964  bool shouldStop() override {
965  if (IgnoreThreshold || ComputeFullInlineCost)
966  return false;
967  // Bail out the moment we cross the threshold. This means we'll under-count
968  // the cost, but only when undercounting doesn't matter.
969  if (Cost < Threshold)
970  return false;
971  DecidedByCostThreshold = true;
972  return true;
973  }
974 
975  void onLoadEliminationOpportunity() override {
976  LoadEliminationCost += InstrCost;
977  }
978 
979  InlineResult onAnalysisStart() override {
980  // Perform some tweaks to the cost and threshold based on the direct
981  // callsite information.
982 
983  // We want to more aggressively inline vector-dense kernels, so up the
984  // threshold, and we'll lower it if the % of vector instructions gets too
985  // low. Note that these bonuses are some what arbitrary and evolved over
986  // time by accident as much as because they are principled bonuses.
987  //
988  // FIXME: It would be nice to remove all such bonuses. At least it would be
989  // nice to base the bonus values on something more scientific.
990  assert(NumInstructions == 0);
991  assert(NumVectorInstructions == 0);
992 
993  // Update the threshold based on callsite properties
994  updateThreshold(CandidateCall, F);
995 
996  // While Threshold depends on commandline options that can take negative
997  // values, we want to enforce the invariant that the computed threshold and
998  // bonuses are non-negative.
999  assert(Threshold >= 0);
1000  assert(SingleBBBonus >= 0);
1001  assert(VectorBonus >= 0);
1002 
1003  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1004  // this Threshold any time, and cost cannot decrease, we can stop processing
1005  // the rest of the function body.
1006  Threshold += (SingleBBBonus + VectorBonus);
1007 
1008  // Give out bonuses for the callsite, as the instructions setting them up
1009  // will be gone after inlining.
1010  addCost(-getCallsiteCost(this->CandidateCall, DL));
1011 
1012  // If this function uses the coldcc calling convention, prefer not to inline
1013  // it.
1014  if (F.getCallingConv() == CallingConv::Cold)
1016 
1017  LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1018 
1019  // Check if we're done. This can happen due to bonuses and penalties.
1020  if (Cost >= Threshold && !ComputeFullInlineCost)
1021  return InlineResult::failure("high cost");
1022 
1023  return InlineResult::success();
1024  }
1025 
1026 public:
1027  InlineCostCallAnalyzer(
1028  Function &Callee, CallBase &Call, const InlineParams &Params,
1029  const TargetTransformInfo &TTI,
1030  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1031  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1032  ProfileSummaryInfo *PSI = nullptr,
1033  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1034  bool IgnoreThreshold = false)
1035  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1036  ComputeFullInlineCost(OptComputeFullInlineCost ||
1037  Params.ComputeFullInlineCost || ORE ||
1038  isCostBenefitAnalysisEnabled()),
1039  Params(Params), Threshold(Params.DefaultThreshold),
1040  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1041  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1042  Writer(this) {
1043  AllowRecursiveCall = *Params.AllowRecursiveCall;
1044  }
1045 
1046  /// Annotation Writer for instruction details
1047  InlineCostAnnotationWriter Writer;
1048 
1049  void dump();
1050 
1051  // Prints the same analysis as dump(), but its definition is not dependent
1052  // on the build.
1053  void print(raw_ostream &OS);
1054 
1055  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1056  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1057  return InstructionCostDetailMap[I];
1058  return None;
1059  }
1060 
1061  virtual ~InlineCostCallAnalyzer() = default;
1062  int getThreshold() const { return Threshold; }
1063  int getCost() const { return Cost; }
1064  int getStaticBonusApplied() const { return StaticBonusApplied; }
1065  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1066  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1067  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1068 };
1069 
1070 // Return true if CB is the sole call to local function Callee.
1071 static bool isSoleCallToLocalFunction(const CallBase &CB,
1072  const Function &Callee) {
1073  return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1074  &Callee == CB.getCalledFunction();
1075 }
1076 
1077 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1078 private:
1079  InlineCostFeatures Cost = {};
1080 
1081  // FIXME: These constants are taken from the heuristic-based cost visitor.
1082  // These should be removed entirely in a later revision to avoid reliance on
1083  // heuristics in the ML inliner.
1084  static constexpr int JTCostMultiplier = 4;
1085  static constexpr int CaseClusterCostMultiplier = 2;
1086  static constexpr int SwitchCostMultiplier = 2;
1087 
1088  // FIXME: These are taken from the heuristic-based cost visitor: we should
1089  // eventually abstract these to the CallAnalyzer to avoid duplication.
1090  unsigned SROACostSavingOpportunities = 0;
1091  int VectorBonus = 0;
1092  int SingleBBBonus = 0;
1093  int Threshold = 5;
1094 
1096 
1097  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1098  Cost[static_cast<size_t>(Feature)] += Delta;
1099  }
1100 
1101  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1102  Cost[static_cast<size_t>(Feature)] = Value;
1103  }
1104 
1105  void onDisableSROA(AllocaInst *Arg) override {
1106  auto CostIt = SROACosts.find(Arg);
1107  if (CostIt == SROACosts.end())
1108  return;
1109 
1110  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1111  SROACostSavingOpportunities -= CostIt->second;
1112  SROACosts.erase(CostIt);
1113  }
1114 
1115  void onDisableLoadElimination() override {
1116  set(InlineCostFeatureIndex::LoadElimination, 1);
1117  }
1118 
1119  void onCallPenalty() override {
1121  }
1122 
1123  void onCallArgumentSetup(const CallBase &Call) override {
1124  increment(InlineCostFeatureIndex::CallArgumentSetup,
1125  Call.arg_size() * InstrCost);
1126  }
1127 
1128  void onLoadRelativeIntrinsic() override {
1129  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InstrCost);
1130  }
1131 
1132  void onLoweredCall(Function *F, CallBase &Call,
1133  bool IsIndirectCall) override {
1134  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1135  Call.arg_size() * InstrCost);
1136 
1137  if (IsIndirectCall) {
1138  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1139  /*HintThreshold*/ {},
1140  /*ColdThreshold*/ {},
1141  /*OptSizeThreshold*/ {},
1142  /*OptMinSizeThreshold*/ {},
1143  /*HotCallSiteThreshold*/ {},
1144  /*LocallyHotCallSiteThreshold*/ {},
1145  /*ColdCallSiteThreshold*/ {},
1146  /*ComputeFullInlineCost*/ true,
1147  /*EnableDeferral*/ true};
1148  IndirectCallParams.DefaultThreshold =
1150 
1151  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1152  GetAssumptionCache, GetBFI, PSI, ORE, false,
1153  true);
1154  if (CA.analyze().isSuccess()) {
1155  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1156  CA.getCost());
1157  increment(InlineCostFeatureIndex::NestedInlines, 1);
1158  }
1159  } else {
1160  onCallPenalty();
1161  }
1162  }
1163 
1164  void onFinalizeSwitch(unsigned JumpTableSize,
1165  unsigned NumCaseCluster) override {
1166 
1167  if (JumpTableSize) {
1168  int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1169  JTCostMultiplier * InstrCost;
1170  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1171  return;
1172  }
1173 
1174  if (NumCaseCluster <= 3) {
1175  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1176  NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
1177  return;
1178  }
1179 
1180  int64_t ExpectedNumberOfCompare =
1181  getExpectedNumberOfCompare(NumCaseCluster);
1182 
1183  int64_t SwitchCost =
1184  ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1185  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1186  }
1187 
1188  void onMissedSimplification() override {
1189  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1190  InstrCost);
1191  }
1192 
1193  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1194  void onAggregateSROAUse(AllocaInst *Arg) override {
1195  SROACosts.find(Arg)->second += InstrCost;
1196  SROACostSavingOpportunities += InstrCost;
1197  }
1198 
1199  void onBlockAnalyzed(const BasicBlock *BB) override {
1200  if (BB->getTerminator()->getNumSuccessors() > 1)
1201  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1202  Threshold -= SingleBBBonus;
1203  }
1204 
1205  InlineResult finalizeAnalysis() override {
1206  auto *Caller = CandidateCall.getFunction();
1207  if (Caller->hasMinSize()) {
1208  DominatorTree DT(F);
1209  LoopInfo LI(DT);
1210  for (Loop *L : LI) {
1211  // Ignore loops that will not be executed
1212  if (DeadBlocks.count(L->getHeader()))
1213  continue;
1214  increment(InlineCostFeatureIndex::NumLoops,
1216  }
1217  }
1218  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1219  set(InlineCostFeatureIndex::SimplifiedInstructions,
1220  NumInstructionsSimplified);
1221  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1222  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1223  NumConstantOffsetPtrArgs);
1224  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1225 
1226  if (NumVectorInstructions <= NumInstructions / 10)
1227  Threshold -= VectorBonus;
1228  else if (NumVectorInstructions <= NumInstructions / 2)
1229  Threshold -= VectorBonus / 2;
1230 
1231  set(InlineCostFeatureIndex::Threshold, Threshold);
1232 
1233  return InlineResult::success();
1234  }
1235 
1236  bool shouldStop() override { return false; }
1237 
1238  void onLoadEliminationOpportunity() override {
1239  increment(InlineCostFeatureIndex::LoadElimination, 1);
1240  }
1241 
1242  InlineResult onAnalysisStart() override {
1243  increment(InlineCostFeatureIndex::CallSiteCost,
1244  -1 * getCallsiteCost(this->CandidateCall, DL));
1245 
1246  set(InlineCostFeatureIndex::ColdCcPenalty,
1247  (F.getCallingConv() == CallingConv::Cold));
1248 
1250  isSoleCallToLocalFunction(CandidateCall, F));
1251 
1252  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1253  // analyzer - instead, we should abstract it to a common method in the
1254  // CallAnalyzer
1255  int SingleBBBonusPercent = 50;
1256  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1257  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1258  Threshold *= TTI.getInliningThresholdMultiplier();
1259  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1260  VectorBonus = Threshold * VectorBonusPercent / 100;
1261  Threshold += (SingleBBBonus + VectorBonus);
1262 
1263  return InlineResult::success();
1264  }
1265 
1266 public:
1267  InlineCostFeaturesAnalyzer(
1268  const TargetTransformInfo &TTI,
1269  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1272  CallBase &Call)
1273  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1274 
1275  const InlineCostFeatures &features() const { return Cost; }
1276 };
1277 
1278 } // namespace
1279 
1280 /// Test whether the given value is an Alloca-derived function argument.
1281 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1282  return SROAArgValues.count(V);
1283 }
1284 
1285 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1286  onDisableSROA(SROAArg);
1287  EnabledSROAAllocas.erase(SROAArg);
1288  disableLoadElimination();
1289 }
1290 
1291 void InlineCostAnnotationWriter::emitInstructionAnnot(
1292  const Instruction *I, formatted_raw_ostream &OS) {
1293  // The cost of inlining of the given instruction is printed always.
1294  // The threshold delta is printed only when it is non-zero. It happens
1295  // when we decided to give a bonus at a particular instruction.
1296  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1297  if (!Record)
1298  OS << "; No analysis for the instruction";
1299  else {
1300  OS << "; cost before = " << Record->CostBefore
1301  << ", cost after = " << Record->CostAfter
1302  << ", threshold before = " << Record->ThresholdBefore
1303  << ", threshold after = " << Record->ThresholdAfter << ", ";
1304  OS << "cost delta = " << Record->getCostDelta();
1305  if (Record->hasThresholdChanged())
1306  OS << ", threshold delta = " << Record->getThresholdDelta();
1307  }
1308  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1309  if (C) {
1310  OS << ", simplified to ";
1311  (*C)->print(OS, true);
1312  }
1313  OS << "\n";
1314 }
1315 
1316 /// If 'V' maps to a SROA candidate, disable SROA for it.
1317 void CallAnalyzer::disableSROA(Value *V) {
1318  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1319  disableSROAForArg(SROAArg);
1320  }
1321 }
1322 
1323 void CallAnalyzer::disableLoadElimination() {
1324  if (EnableLoadElimination) {
1325  onDisableLoadElimination();
1326  EnableLoadElimination = false;
1327  }
1328 }
1329 
1330 /// Accumulate a constant GEP offset into an APInt if possible.
1331 ///
1332 /// Returns false if unable to compute the offset for any reason. Respects any
1333 /// simplified values known during the analysis of this callsite.
1334 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1335  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1336  assert(IntPtrWidth == Offset.getBitWidth());
1337 
1338  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1339  GTI != GTE; ++GTI) {
1340  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1341  if (!OpC)
1342  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1343  OpC = dyn_cast<ConstantInt>(SimpleOp);
1344  if (!OpC)
1345  return false;
1346  if (OpC->isZero())
1347  continue;
1348 
1349  // Handle a struct index, which adds its field offset to the pointer.
1350  if (StructType *STy = GTI.getStructTypeOrNull()) {
1351  unsigned ElementIdx = OpC->getZExtValue();
1352  const StructLayout *SL = DL.getStructLayout(STy);
1353  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1354  continue;
1355  }
1356 
1357  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1358  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1359  }
1360  return true;
1361 }
1362 
1363 /// Use TTI to check whether a GEP is free.
1364 ///
1365 /// Respects any simplified values known during the analysis of this callsite.
1366 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1368  Operands.push_back(GEP.getOperand(0));
1369  for (const Use &Op : GEP.indices())
1370  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1371  Operands.push_back(SimpleOp);
1372  else
1373  Operands.push_back(Op);
1377 }
1378 
1379 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1380  disableSROA(I.getOperand(0));
1381 
1382  // Check whether inlining will turn a dynamic alloca into a static
1383  // alloca and handle that case.
1384  if (I.isArrayAllocation()) {
1385  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1386  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1387  // Sometimes a dynamic alloca could be converted into a static alloca
1388  // after this constant prop, and become a huge static alloca on an
1389  // unconditional CFG path. Avoid inlining if this is going to happen above
1390  // a threshold.
1391  // FIXME: If the threshold is removed or lowered too much, we could end up
1392  // being too pessimistic and prevent inlining non-problematic code. This
1393  // could result in unintended perf regressions. A better overall strategy
1394  // is needed to track stack usage during inlining.
1395  Type *Ty = I.getAllocatedType();
1396  AllocatedSize = SaturatingMultiplyAdd(
1397  AllocSize->getLimitedValue(),
1398  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1400  HasDynamicAlloca = true;
1401  return false;
1402  }
1403  }
1404 
1405  // Accumulate the allocated size.
1406  if (I.isStaticAlloca()) {
1407  Type *Ty = I.getAllocatedType();
1408  AllocatedSize =
1409  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1410  }
1411 
1412  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1413  // a variety of reasons, and so we would like to not inline them into
1414  // functions which don't currently have a dynamic alloca. This simply
1415  // disables inlining altogether in the presence of a dynamic alloca.
1416  if (!I.isStaticAlloca())
1417  HasDynamicAlloca = true;
1418 
1419  return false;
1420 }
1421 
1422 bool CallAnalyzer::visitPHI(PHINode &I) {
1423  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1424  // though we don't want to propagate it's bonuses. The idea is to disable
1425  // SROA if it *might* be used in an inappropriate manner.
1426 
1427  // Phi nodes are always zero-cost.
1428  // FIXME: Pointer sizes may differ between different address spaces, so do we
1429  // need to use correct address space in the call to getPointerSizeInBits here?
1430  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1431  // see the ZeroOffset is used as a dummy value, so we can probably use any
1432  // bit width for the ZeroOffset?
1433  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1434  bool CheckSROA = I.getType()->isPointerTy();
1435 
1436  // Track the constant or pointer with constant offset we've seen so far.
1437  Constant *FirstC = nullptr;
1438  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1439  Value *FirstV = nullptr;
1440 
1441  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1442  BasicBlock *Pred = I.getIncomingBlock(i);
1443  // If the incoming block is dead, skip the incoming block.
1444  if (DeadBlocks.count(Pred))
1445  continue;
1446  // If the parent block of phi is not the known successor of the incoming
1447  // block, skip the incoming block.
1448  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1449  if (KnownSuccessor && KnownSuccessor != I.getParent())
1450  continue;
1451 
1452  Value *V = I.getIncomingValue(i);
1453  // If the incoming value is this phi itself, skip the incoming value.
1454  if (&I == V)
1455  continue;
1456 
1457  Constant *C = dyn_cast<Constant>(V);
1458  if (!C)
1459  C = SimplifiedValues.lookup(V);
1460 
1461  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1462  if (!C && CheckSROA)
1463  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1464 
1465  if (!C && !BaseAndOffset.first)
1466  // The incoming value is neither a constant nor a pointer with constant
1467  // offset, exit early.
1468  return true;
1469 
1470  if (FirstC) {
1471  if (FirstC == C)
1472  // If we've seen a constant incoming value before and it is the same
1473  // constant we see this time, continue checking the next incoming value.
1474  continue;
1475  // Otherwise early exit because we either see a different constant or saw
1476  // a constant before but we have a pointer with constant offset this time.
1477  return true;
1478  }
1479 
1480  if (FirstV) {
1481  // The same logic as above, but check pointer with constant offset here.
1482  if (FirstBaseAndOffset == BaseAndOffset)
1483  continue;
1484  return true;
1485  }
1486 
1487  if (C) {
1488  // This is the 1st time we've seen a constant, record it.
1489  FirstC = C;
1490  continue;
1491  }
1492 
1493  // The remaining case is that this is the 1st time we've seen a pointer with
1494  // constant offset, record it.
1495  FirstV = V;
1496  FirstBaseAndOffset = BaseAndOffset;
1497  }
1498 
1499  // Check if we can map phi to a constant.
1500  if (FirstC) {
1501  SimplifiedValues[&I] = FirstC;
1502  return true;
1503  }
1504 
1505  // Check if we can map phi to a pointer with constant offset.
1506  if (FirstBaseAndOffset.first) {
1507  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1508 
1509  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1510  SROAArgValues[&I] = SROAArg;
1511  }
1512 
1513  return true;
1514 }
1515 
1516 /// Check we can fold GEPs of constant-offset call site argument pointers.
1517 /// This requires target data and inbounds GEPs.
1518 ///
1519 /// \return true if the specified GEP can be folded.
1520 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1521  // Check if we have a base + offset for the pointer.
1522  std::pair<Value *, APInt> BaseAndOffset =
1523  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1524  if (!BaseAndOffset.first)
1525  return false;
1526 
1527  // Check if the offset of this GEP is constant, and if so accumulate it
1528  // into Offset.
1529  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1530  return false;
1531 
1532  // Add the result as a new mapping to Base + Offset.
1533  ConstantOffsetPtrs[&I] = BaseAndOffset;
1534 
1535  return true;
1536 }
1537 
1538 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1539  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1540 
1541  // Lambda to check whether a GEP's indices are all constant.
1542  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1543  for (const Use &Op : GEP.indices())
1544  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1545  return false;
1546  return true;
1547  };
1548 
1550  if (simplifyInstruction(I))
1551  return true;
1552 
1553  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1554  if (SROAArg)
1555  SROAArgValues[&I] = SROAArg;
1556 
1557  // Constant GEPs are modeled as free.
1558  return true;
1559  }
1560 
1561  // Variable GEPs will require math and will disable SROA.
1562  if (SROAArg)
1563  disableSROAForArg(SROAArg);
1564  return isGEPFree(I);
1565 }
1566 
1567 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1570  for (Value *Op : I.operands()) {
1571  Constant *COp = dyn_cast<Constant>(Op);
1572  if (!COp)
1573  COp = SimplifiedValues.lookup(Op);
1574  if (!COp)
1575  return false;
1576  COps.push_back(COp);
1577  }
1578  auto *C = ConstantFoldInstOperands(&I, COps, DL);
1579  if (!C)
1580  return false;
1581  SimplifiedValues[&I] = C;
1582  return true;
1583 }
1584 
1585 /// Try to simplify a call to llvm.is.constant.
1586 ///
1587 /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1588 /// we expect calls of this specific intrinsic to be infrequent.
1589 ///
1590 /// FIXME: Given that we know CB's parent (F) caller
1591 /// (CandidateCall->getParent()->getParent()), we might be able to determine
1592 /// whether inlining F into F's caller would change how the call to
1593 /// llvm.is.constant would evaluate.
1594 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1595  Value *Arg = CB.getArgOperand(0);
1596  auto *C = dyn_cast<Constant>(Arg);
1597 
1598  if (!C)
1599  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
1600 
1601  Type *RT = CB.getFunctionType()->getReturnType();
1602  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1603  return true;
1604 }
1605 
1606 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1607  // Propagate constants through bitcasts.
1608  if (simplifyInstruction(I))
1609  return true;
1610 
1611  // Track base/offsets through casts
1612  std::pair<Value *, APInt> BaseAndOffset =
1613  ConstantOffsetPtrs.lookup(I.getOperand(0));
1614  // Casts don't change the offset, just wrap it up.
1615  if (BaseAndOffset.first)
1616  ConstantOffsetPtrs[&I] = BaseAndOffset;
1617 
1618  // Also look for SROA candidates here.
1619  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1620  SROAArgValues[&I] = SROAArg;
1621 
1622  // Bitcasts are always zero cost.
1623  return true;
1624 }
1625 
1626 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1627  // Propagate constants through ptrtoint.
1628  if (simplifyInstruction(I))
1629  return true;
1630 
1631  // Track base/offset pairs when converted to a plain integer provided the
1632  // integer is large enough to represent the pointer.
1633  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1634  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1635  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1636  std::pair<Value *, APInt> BaseAndOffset =
1637  ConstantOffsetPtrs.lookup(I.getOperand(0));
1638  if (BaseAndOffset.first)
1639  ConstantOffsetPtrs[&I] = BaseAndOffset;
1640  }
1641 
1642  // This is really weird. Technically, ptrtoint will disable SROA. However,
1643  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1644  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1645  // would block SROA would also block SROA if applied directly to a pointer,
1646  // and so we can just add the integer in here. The only places where SROA is
1647  // preserved either cannot fire on an integer, or won't in-and-of themselves
1648  // disable SROA (ext) w/o some later use that we would see and disable.
1649  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1650  SROAArgValues[&I] = SROAArg;
1651 
1654 }
1655 
1656 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1657  // Propagate constants through ptrtoint.
1658  if (simplifyInstruction(I))
1659  return true;
1660 
1661  // Track base/offset pairs when round-tripped through a pointer without
1662  // modifications provided the integer is not too large.
1663  Value *Op = I.getOperand(0);
1664  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1665  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1666  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1667  if (BaseAndOffset.first)
1668  ConstantOffsetPtrs[&I] = BaseAndOffset;
1669  }
1670 
1671  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1672  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1673  SROAArgValues[&I] = SROAArg;
1674 
1677 }
1678 
1679 bool CallAnalyzer::visitCastInst(CastInst &I) {
1680  // Propagate constants through casts.
1681  if (simplifyInstruction(I))
1682  return true;
1683 
1684  // Disable SROA in the face of arbitrary casts we don't explicitly list
1685  // elsewhere.
1686  disableSROA(I.getOperand(0));
1687 
1688  // If this is a floating-point cast, and the target says this operation
1689  // is expensive, this may eventually become a library call. Treat the cost
1690  // as such.
1691  switch (I.getOpcode()) {
1692  case Instruction::FPTrunc:
1693  case Instruction::FPExt:
1694  case Instruction::UIToFP:
1695  case Instruction::SIToFP:
1696  case Instruction::FPToUI:
1697  case Instruction::FPToSI:
1699  onCallPenalty();
1700  break;
1701  default:
1702  break;
1703  }
1704 
1707 }
1708 
1709 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1710  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1711 }
1712 
1713 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1714  // Does the *call site* have the NonNull attribute set on an argument? We
1715  // use the attribute on the call site to memoize any analysis done in the
1716  // caller. This will also trip if the callee function has a non-null
1717  // parameter attribute, but that's a less interesting case because hopefully
1718  // the callee would already have been simplified based on that.
1719  if (Argument *A = dyn_cast<Argument>(V))
1720  if (paramHasAttr(A, Attribute::NonNull))
1721  return true;
1722 
1723  // Is this an alloca in the caller? This is distinct from the attribute case
1724  // above because attributes aren't updated within the inliner itself and we
1725  // always want to catch the alloca derived case.
1726  if (isAllocaDerivedArg(V))
1727  // We can actually predict the result of comparisons between an
1728  // alloca-derived value and null. Note that this fires regardless of
1729  // SROA firing.
1730  return true;
1731 
1732  return false;
1733 }
1734 
1735 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1736  // If the normal destination of the invoke or the parent block of the call
1737  // site is unreachable-terminated, there is little point in inlining this
1738  // unless there is literally zero cost.
1739  // FIXME: Note that it is possible that an unreachable-terminated block has a
1740  // hot entry. For example, in below scenario inlining hot_call_X() may be
1741  // beneficial :
1742  // main() {
1743  // hot_call_1();
1744  // ...
1745  // hot_call_N()
1746  // exit(0);
1747  // }
1748  // For now, we are not handling this corner case here as it is rare in real
1749  // code. In future, we should elaborate this based on BPI and BFI in more
1750  // general threshold adjusting heuristics in updateThreshold().
1751  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1752  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1753  return false;
1754  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1755  return false;
1756 
1757  return true;
1758 }
1759 
1761  BlockFrequencyInfo *CallerBFI) {
1762  // If global profile summary is available, then callsite's coldness is
1763  // determined based on that.
1764  if (PSI && PSI->hasProfileSummary())
1765  return PSI->isColdCallSite(Call, CallerBFI);
1766 
1767  // Otherwise we need BFI to be available.
1768  if (!CallerBFI)
1769  return false;
1770 
1771  // Determine if the callsite is cold relative to caller's entry. We could
1772  // potentially cache the computation of scaled entry frequency, but the added
1773  // complexity is not worth it unless this scaling shows up high in the
1774  // profiles.
1775  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1776  auto CallSiteBB = Call.getParent();
1777  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1778  auto CallerEntryFreq =
1779  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1780  return CallSiteFreq < CallerEntryFreq * ColdProb;
1781 }
1782 
1784 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1785  BlockFrequencyInfo *CallerBFI) {
1786 
1787  // If global profile summary is available, then callsite's hotness is
1788  // determined based on that.
1789  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1790  return Params.HotCallSiteThreshold;
1791 
1792  // Otherwise we need BFI to be available and to have a locally hot callsite
1793  // threshold.
1794  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1795  return None;
1796 
1797  // Determine if the callsite is hot relative to caller's entry. We could
1798  // potentially cache the computation of scaled entry frequency, but the added
1799  // complexity is not worth it unless this scaling shows up high in the
1800  // profiles.
1801  auto CallSiteBB = Call.getParent();
1802  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1803  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1804  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1805  return Params.LocallyHotCallSiteThreshold;
1806 
1807  // Otherwise treat it normally.
1808  return None;
1809 }
1810 
1811 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1812  // If no size growth is allowed for this inlining, set Threshold to 0.
1813  if (!allowSizeGrowth(Call)) {
1814  Threshold = 0;
1815  return;
1816  }
1817 
1818  Function *Caller = Call.getCaller();
1819 
1820  // return min(A, B) if B is valid.
1821  auto MinIfValid = [](int A, Optional<int> B) {
1822  return B ? std::min(A, B.value()) : A;
1823  };
1824 
1825  // return max(A, B) if B is valid.
1826  auto MaxIfValid = [](int A, Optional<int> B) {
1827  return B ? std::max(A, B.value()) : A;
1828  };
1829 
1830  // Various bonus percentages. These are multiplied by Threshold to get the
1831  // bonus values.
1832  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1833  // basic block at the given callsite context. This is speculatively applied
1834  // and withdrawn if more than one basic block is seen.
1835  //
1836  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1837  // of the last call to a static function as inlining such functions is
1838  // guaranteed to reduce code size.
1839  //
1840  // These bonus percentages may be set to 0 based on properties of the caller
1841  // and the callsite.
1842  int SingleBBBonusPercent = 50;
1843  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1845 
1846  // Lambda to set all the above bonus and bonus percentages to 0.
1847  auto DisallowAllBonuses = [&]() {
1848  SingleBBBonusPercent = 0;
1849  VectorBonusPercent = 0;
1851  };
1852 
1853  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1854  // and reduce the threshold if the caller has the necessary attribute.
1855  if (Caller->hasMinSize()) {
1856  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1857  // For minsize, we want to disable the single BB bonus and the vector
1858  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1859  // a static function will, at the minimum, eliminate the parameter setup and
1860  // call/return instructions.
1861  SingleBBBonusPercent = 0;
1862  VectorBonusPercent = 0;
1863  } else if (Caller->hasOptSize())
1864  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1865 
1866  // Adjust the threshold based on inlinehint attribute and profile based
1867  // hotness information if the caller does not have MinSize attribute.
1868  if (!Caller->hasMinSize()) {
1869  if (Callee.hasFnAttribute(Attribute::InlineHint))
1870  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1871 
1872  // FIXME: After switching to the new passmanager, simplify the logic below
1873  // by checking only the callsite hotness/coldness as we will reliably
1874  // have local profile information.
1875  //
1876  // Callsite hotness and coldness can be determined if sample profile is
1877  // used (which adds hotness metadata to calls) or if caller's
1878  // BlockFrequencyInfo is available.
1879  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1880  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1881  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1882  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1883  // FIXME: This should update the threshold only if it exceeds the
1884  // current threshold, but AutoFDO + ThinLTO currently relies on this
1885  // behavior to prevent inlining of hot callsites during ThinLTO
1886  // compile phase.
1887  Threshold = *HotCallSiteThreshold;
1888  } else if (isColdCallSite(Call, CallerBFI)) {
1889  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1890  // Do not apply bonuses for a cold callsite including the
1891  // LastCallToStatic bonus. While this bonus might result in code size
1892  // reduction, it can cause the size of a non-cold caller to increase
1893  // preventing it from being inlined.
1894  DisallowAllBonuses();
1895  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1896  } else if (PSI) {
1897  // Use callee's global profile information only if we have no way of
1898  // determining this via callsite information.
1899  if (PSI->isFunctionEntryHot(&Callee)) {
1900  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1901  // If callsite hotness can not be determined, we may still know
1902  // that the callee is hot and treat it as a weaker hint for threshold
1903  // increase.
1904  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1905  } else if (PSI->isFunctionEntryCold(&Callee)) {
1906  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1907  // Do not apply bonuses for a cold callee including the
1908  // LastCallToStatic bonus. While this bonus might result in code size
1909  // reduction, it can cause the size of a non-cold caller to increase
1910  // preventing it from being inlined.
1911  DisallowAllBonuses();
1912  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1913  }
1914  }
1915  }
1916 
1917  Threshold += TTI.adjustInliningThreshold(&Call);
1918 
1919  // Finally, take the target-specific inlining threshold multiplier into
1920  // account.
1921  Threshold *= TTI.getInliningThresholdMultiplier();
1922 
1923  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1924  VectorBonus = Threshold * VectorBonusPercent / 100;
1925 
1926  // If there is only one call of the function, and it has internal linkage,
1927  // the cost of inlining it drops dramatically. It may seem odd to update
1928  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1929  if (isSoleCallToLocalFunction(Call, F)) {
1931  StaticBonusApplied = LastCallToStaticBonus;
1932  }
1933 }
1934 
1935 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1936  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1937  // First try to handle simplified comparisons.
1938  if (simplifyInstruction(I))
1939  return true;
1940 
1941  if (I.getOpcode() == Instruction::FCmp)
1942  return false;
1943 
1944  // Otherwise look for a comparison between constant offset pointers with
1945  // a common base.
1946  Value *LHSBase, *RHSBase;
1947  APInt LHSOffset, RHSOffset;
1948  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1949  if (LHSBase) {
1950  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1951  if (RHSBase && LHSBase == RHSBase) {
1952  // We have common bases, fold the icmp to a constant based on the
1953  // offsets.
1954  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1955  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1956  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1957  SimplifiedValues[&I] = C;
1958  ++NumConstantPtrCmps;
1959  return true;
1960  }
1961  }
1962  }
1963 
1964  // If the comparison is an equality comparison with null, we can simplify it
1965  // if we know the value (argument) can't be null
1966  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1967  isKnownNonNullInCallee(I.getOperand(0))) {
1968  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1969  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1970  : ConstantInt::getFalse(I.getType());
1971  return true;
1972  }
1973  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1974 }
1975 
1976 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1977  // Try to handle a special case: we can fold computing the difference of two
1978  // constant-related pointers.
1979  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1980  Value *LHSBase, *RHSBase;
1981  APInt LHSOffset, RHSOffset;
1982  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1983  if (LHSBase) {
1984  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1985  if (RHSBase && LHSBase == RHSBase) {
1986  // We have common bases, fold the subtract to a constant based on the
1987  // offsets.
1988  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1989  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1990  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1991  SimplifiedValues[&I] = C;
1992  ++NumConstantPtrDiffs;
1993  return true;
1994  }
1995  }
1996  }
1997 
1998  // Otherwise, fall back to the generic logic for simplifying and handling
1999  // instructions.
2000  return Base::visitSub(I);
2001 }
2002 
2003 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2004  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2005  Constant *CLHS = dyn_cast<Constant>(LHS);
2006  if (!CLHS)
2007  CLHS = SimplifiedValues.lookup(LHS);
2008  Constant *CRHS = dyn_cast<Constant>(RHS);
2009  if (!CRHS)
2010  CRHS = SimplifiedValues.lookup(RHS);
2011 
2012  Value *SimpleV = nullptr;
2013  if (auto FI = dyn_cast<FPMathOperator>(&I))
2014  SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2015  FI->getFastMathFlags(), DL);
2016  else
2017  SimpleV =
2018  simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
2019 
2020  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2021  SimplifiedValues[&I] = C;
2022 
2023  if (SimpleV)
2024  return true;
2025 
2026  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2027  disableSROA(LHS);
2028  disableSROA(RHS);
2029 
2030  // If the instruction is floating point, and the target says this operation
2031  // is expensive, this may eventually become a library call. Treat the cost
2032  // as such. Unless it's fneg which can be implemented with an xor.
2033  using namespace llvm::PatternMatch;
2034  if (I.getType()->isFloatingPointTy() &&
2036  !match(&I, m_FNeg(m_Value())))
2037  onCallPenalty();
2038 
2039  return false;
2040 }
2041 
2042 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2043  Value *Op = I.getOperand(0);
2044  Constant *COp = dyn_cast<Constant>(Op);
2045  if (!COp)
2046  COp = SimplifiedValues.lookup(Op);
2047 
2048  Value *SimpleV = simplifyFNegInst(
2049  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2050 
2051  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2052  SimplifiedValues[&I] = C;
2053 
2054  if (SimpleV)
2055  return true;
2056 
2057  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2058  disableSROA(Op);
2059 
2060  return false;
2061 }
2062 
2063 bool CallAnalyzer::visitLoad(LoadInst &I) {
2064  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2065  return true;
2066 
2067  // If the data is already loaded from this address and hasn't been clobbered
2068  // by any stores or calls, this load is likely to be redundant and can be
2069  // eliminated.
2070  if (EnableLoadElimination &&
2071  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2072  onLoadEliminationOpportunity();
2073  return true;
2074  }
2075 
2076  onMemAccess();
2077  return false;
2078 }
2079 
2080 bool CallAnalyzer::visitStore(StoreInst &I) {
2081  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2082  return true;
2083 
2084  // The store can potentially clobber loads and prevent repeated loads from
2085  // being eliminated.
2086  // FIXME:
2087  // 1. We can probably keep an initial set of eliminatable loads substracted
2088  // from the cost even when we finally see a store. We just need to disable
2089  // *further* accumulation of elimination savings.
2090  // 2. We should probably at some point thread MemorySSA for the callee into
2091  // this and then use that to actually compute *really* precise savings.
2092  disableLoadElimination();
2093 
2094  onMemAccess();
2095  return false;
2096 }
2097 
2098 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2099  // Constant folding for extract value is trivial.
2100  if (simplifyInstruction(I))
2101  return true;
2102 
2103  // SROA can't look through these, but they may be free.
2104  return Base::visitExtractValue(I);
2105 }
2106 
2107 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2108  // Constant folding for insert value is trivial.
2109  if (simplifyInstruction(I))
2110  return true;
2111 
2112  // SROA can't look through these, but they may be free.
2113  return Base::visitInsertValue(I);
2114 }
2115 
2116 /// Try to simplify a call site.
2117 ///
2118 /// Takes a concrete function and callsite and tries to actually simplify it by
2119 /// analyzing the arguments and call itself with instsimplify. Returns true if
2120 /// it has simplified the callsite to some other entity (a constant), making it
2121 /// free.
2122 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2123  // FIXME: Using the instsimplify logic directly for this is inefficient
2124  // because we have to continually rebuild the argument list even when no
2125  // simplifications can be performed. Until that is fixed with remapping
2126  // inside of instsimplify, directly constant fold calls here.
2127  if (!canConstantFoldCallTo(&Call, F))
2128  return false;
2129 
2130  // Try to re-map the arguments to constants.
2131  SmallVector<Constant *, 4> ConstantArgs;
2132  ConstantArgs.reserve(Call.arg_size());
2133  for (Value *I : Call.args()) {
2134  Constant *C = dyn_cast<Constant>(I);
2135  if (!C)
2136  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2137  if (!C)
2138  return false; // This argument doesn't map to a constant.
2139 
2140  ConstantArgs.push_back(C);
2141  }
2142  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2143  SimplifiedValues[&Call] = C;
2144  return true;
2145  }
2146 
2147  return false;
2148 }
2149 
2150 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2151  if (!onCallBaseVisitStart(Call))
2152  return true;
2153 
2154  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2155  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2156  // This aborts the entire analysis.
2157  ExposesReturnsTwice = true;
2158  return false;
2159  }
2160  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2161  ContainsNoDuplicateCall = true;
2162 
2163  Function *F = Call.getCalledFunction();
2164  bool IsIndirectCall = !F;
2165  if (IsIndirectCall) {
2166  // Check if this happens to be an indirect function call to a known function
2167  // in this inline context. If not, we've done all we can.
2168  Value *Callee = Call.getCalledOperand();
2169  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2170  if (!F || F->getFunctionType() != Call.getFunctionType()) {
2171  onCallArgumentSetup(Call);
2172 
2173  if (!Call.onlyReadsMemory())
2174  disableLoadElimination();
2175  return Base::visitCallBase(Call);
2176  }
2177  }
2178 
2179  assert(F && "Expected a call to a known function");
2180 
2181  // When we have a concrete function, first try to simplify it directly.
2182  if (simplifyCallSite(F, Call))
2183  return true;
2184 
2185  // Next check if it is an intrinsic we know about.
2186  // FIXME: Lift this into part of the InstVisitor.
2187  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2188  switch (II->getIntrinsicID()) {
2189  default:
2190  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2191  disableLoadElimination();
2192  return Base::visitCallBase(Call);
2193 
2194  case Intrinsic::load_relative:
2195  onLoadRelativeIntrinsic();
2196  return false;
2197 
2198  case Intrinsic::memset:
2199  case Intrinsic::memcpy:
2200  case Intrinsic::memmove:
2201  disableLoadElimination();
2202  // SROA can usually chew through these intrinsics, but they aren't free.
2203  return false;
2204  case Intrinsic::icall_branch_funnel:
2205  case Intrinsic::localescape:
2206  HasUninlineableIntrinsic = true;
2207  return false;
2208  case Intrinsic::vastart:
2209  InitsVargArgs = true;
2210  return false;
2211  case Intrinsic::launder_invariant_group:
2212  case Intrinsic::strip_invariant_group:
2213  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2214  SROAArgValues[II] = SROAArg;
2215  return true;
2216  case Intrinsic::is_constant:
2217  return simplifyIntrinsicCallIsConstant(Call);
2218  }
2219  }
2220 
2221  if (F == Call.getFunction()) {
2222  // This flag will fully abort the analysis, so don't bother with anything
2223  // else.
2224  IsRecursiveCall = true;
2225  if (!AllowRecursiveCall)
2226  return false;
2227  }
2228 
2229  if (TTI.isLoweredToCall(F)) {
2230  onLoweredCall(F, Call, IsIndirectCall);
2231  }
2232 
2233  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2234  disableLoadElimination();
2235  return Base::visitCallBase(Call);
2236 }
2237 
2238 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2239  // At least one return instruction will be free after inlining.
2240  bool Free = !HasReturn;
2241  HasReturn = true;
2242  return Free;
2243 }
2244 
2245 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2246  // We model unconditional branches as essentially free -- they really
2247  // shouldn't exist at all, but handling them makes the behavior of the
2248  // inliner more regular and predictable. Interestingly, conditional branches
2249  // which will fold away are also free.
2250  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2251  isa_and_nonnull<ConstantInt>(
2252  SimplifiedValues.lookup(BI.getCondition()));
2253 }
2254 
2255 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2256  bool CheckSROA = SI.getType()->isPointerTy();
2257  Value *TrueVal = SI.getTrueValue();
2258  Value *FalseVal = SI.getFalseValue();
2259 
2260  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2261  if (!TrueC)
2262  TrueC = SimplifiedValues.lookup(TrueVal);
2263  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2264  if (!FalseC)
2265  FalseC = SimplifiedValues.lookup(FalseVal);
2266  Constant *CondC =
2267  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2268 
2269  if (!CondC) {
2270  // Select C, X, X => X
2271  if (TrueC == FalseC && TrueC) {
2272  SimplifiedValues[&SI] = TrueC;
2273  return true;
2274  }
2275 
2276  if (!CheckSROA)
2277  return Base::visitSelectInst(SI);
2278 
2279  std::pair<Value *, APInt> TrueBaseAndOffset =
2280  ConstantOffsetPtrs.lookup(TrueVal);
2281  std::pair<Value *, APInt> FalseBaseAndOffset =
2282  ConstantOffsetPtrs.lookup(FalseVal);
2283  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2284  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2285 
2286  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2287  SROAArgValues[&SI] = SROAArg;
2288  return true;
2289  }
2290 
2291  return Base::visitSelectInst(SI);
2292  }
2293 
2294  // Select condition is a constant.
2295  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2296  : (CondC->isNullValue()) ? FalseVal
2297  : nullptr;
2298  if (!SelectedV) {
2299  // Condition is a vector constant that is not all 1s or all 0s. If all
2300  // operands are constants, ConstantExpr::getSelect() can handle the cases
2301  // such as select vectors.
2302  if (TrueC && FalseC) {
2303  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2304  SimplifiedValues[&SI] = C;
2305  return true;
2306  }
2307  }
2308  return Base::visitSelectInst(SI);
2309  }
2310 
2311  // Condition is either all 1s or all 0s. SI can be simplified.
2312  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2313  SimplifiedValues[&SI] = SelectedC;
2314  return true;
2315  }
2316 
2317  if (!CheckSROA)
2318  return true;
2319 
2320  std::pair<Value *, APInt> BaseAndOffset =
2321  ConstantOffsetPtrs.lookup(SelectedV);
2322  if (BaseAndOffset.first) {
2323  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2324 
2325  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2326  SROAArgValues[&SI] = SROAArg;
2327  }
2328 
2329  return true;
2330 }
2331 
2332 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2333  // We model unconditional switches as free, see the comments on handling
2334  // branches.
2335  if (isa<ConstantInt>(SI.getCondition()))
2336  return true;
2337  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2338  if (isa<ConstantInt>(V))
2339  return true;
2340 
2341  // Assume the most general case where the switch is lowered into
2342  // either a jump table, bit test, or a balanced binary tree consisting of
2343  // case clusters without merging adjacent clusters with the same
2344  // destination. We do not consider the switches that are lowered with a mix
2345  // of jump table/bit test/binary search tree. The cost of the switch is
2346  // proportional to the size of the tree or the size of jump table range.
2347  //
2348  // NB: We convert large switches which are just used to initialize large phi
2349  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2350  // inlining those. It will prevent inlining in cases where the optimization
2351  // does not (yet) fire.
2352 
2353  unsigned JumpTableSize = 0;
2354  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2355  unsigned NumCaseCluster =
2356  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2357 
2358  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2359  return false;
2360 }
2361 
2362 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2363  // We never want to inline functions that contain an indirectbr. This is
2364  // incorrect because all the blockaddress's (in static global initializers
2365  // for example) would be referring to the original function, and this
2366  // indirect jump would jump from the inlined copy of the function into the
2367  // original function which is extremely undefined behavior.
2368  // FIXME: This logic isn't really right; we can safely inline functions with
2369  // indirectbr's as long as no other function or global references the
2370  // blockaddress of a block within the current function.
2371  HasIndirectBr = true;
2372  return false;
2373 }
2374 
2375 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2376  // FIXME: It's not clear that a single instruction is an accurate model for
2377  // the inline cost of a resume instruction.
2378  return false;
2379 }
2380 
2381 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2382  // FIXME: It's not clear that a single instruction is an accurate model for
2383  // the inline cost of a cleanupret instruction.
2384  return false;
2385 }
2386 
2387 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2388  // FIXME: It's not clear that a single instruction is an accurate model for
2389  // the inline cost of a catchret instruction.
2390  return false;
2391 }
2392 
2393 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2394  // FIXME: It might be reasonably to discount the cost of instructions leading
2395  // to unreachable as they have the lowest possible impact on both runtime and
2396  // code size.
2397  return true; // No actual code is needed for unreachable.
2398 }
2399 
2400 bool CallAnalyzer::visitInstruction(Instruction &I) {
2401  // Some instructions are free. All of the free intrinsics can also be
2402  // handled by SROA, etc.
2405  return true;
2406 
2407  // We found something we don't understand or can't handle. Mark any SROA-able
2408  // values in the operand list as no longer viable.
2409  for (const Use &Op : I.operands())
2410  disableSROA(Op);
2411 
2412  return false;
2413 }
2414 
2415 /// Analyze a basic block for its contribution to the inline cost.
2416 ///
2417 /// This method walks the analyzer over every instruction in the given basic
2418 /// block and accounts for their cost during inlining at this callsite. It
2419 /// aborts early if the threshold has been exceeded or an impossible to inline
2420 /// construct has been detected. It returns false if inlining is no longer
2421 /// viable, and true if inlining remains viable.
2423 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2424  SmallPtrSetImpl<const Value *> &EphValues) {
2425  for (Instruction &I : *BB) {
2426  // FIXME: Currently, the number of instructions in a function regardless of
2427  // our ability to simplify them during inline to constants or dead code,
2428  // are actually used by the vector bonus heuristic. As long as that's true,
2429  // we have to special case debug intrinsics here to prevent differences in
2430  // inlining due to debug symbols. Eventually, the number of unsimplified
2431  // instructions shouldn't factor into the cost computation, but until then,
2432  // hack around it here.
2433  // Similarly, skip pseudo-probes.
2434  if (I.isDebugOrPseudoInst())
2435  continue;
2436 
2437  // Skip ephemeral values.
2438  if (EphValues.count(&I))
2439  continue;
2440 
2441  ++NumInstructions;
2442  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2443  ++NumVectorInstructions;
2444 
2445  // If the instruction simplified to a constant, there is no cost to this
2446  // instruction. Visit the instructions using our InstVisitor to account for
2447  // all of the per-instruction logic. The visit tree returns true if we
2448  // consumed the instruction in any way, and false if the instruction's base
2449  // cost should count against inlining.
2450  onInstructionAnalysisStart(&I);
2451 
2452  if (Base::visit(&I))
2453  ++NumInstructionsSimplified;
2454  else
2455  onMissedSimplification();
2456 
2457  onInstructionAnalysisFinish(&I);
2458  using namespace ore;
2459  // If the visit this instruction detected an uninlinable pattern, abort.
2461  if (IsRecursiveCall && !AllowRecursiveCall)
2462  IR = InlineResult::failure("recursive");
2463  else if (ExposesReturnsTwice)
2464  IR = InlineResult::failure("exposes returns twice");
2465  else if (HasDynamicAlloca)
2466  IR = InlineResult::failure("dynamic alloca");
2467  else if (HasIndirectBr)
2468  IR = InlineResult::failure("indirect branch");
2469  else if (HasUninlineableIntrinsic)
2470  IR = InlineResult::failure("uninlinable intrinsic");
2471  else if (InitsVargArgs)
2472  IR = InlineResult::failure("varargs");
2473  if (!IR.isSuccess()) {
2474  if (ORE)
2475  ORE->emit([&]() {
2476  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2477  &CandidateCall)
2478  << NV("Callee", &F) << " has uninlinable pattern ("
2479  << NV("InlineResult", IR.getFailureReason())
2480  << ") and cost is not fully computed";
2481  });
2482  return IR;
2483  }
2484 
2485  // If the caller is a recursive function then we don't want to inline
2486  // functions which allocate a lot of stack space because it would increase
2487  // the caller stack usage dramatically.
2488  if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2489  auto IR =
2490  InlineResult::failure("recursive and allocates too much stack space");
2491  if (ORE)
2492  ORE->emit([&]() {
2493  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2494  &CandidateCall)
2495  << NV("Callee", &F) << " is "
2496  << NV("InlineResult", IR.getFailureReason())
2497  << ". Cost is not fully computed";
2498  });
2499  return IR;
2500  }
2501 
2502  if (shouldStop())
2503  return InlineResult::failure(
2504  "Call site analysis is not favorable to inlining.");
2505  }
2506 
2507  return InlineResult::success();
2508 }
2509 
2510 /// Compute the base pointer and cumulative constant offsets for V.
2511 ///
2512 /// This strips all constant offsets off of V, leaving it the base pointer, and
2513 /// accumulates the total constant offset applied in the returned constant. It
2514 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2515 /// no constant offsets applied.
2516 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2517  if (!V->getType()->isPointerTy())
2518  return nullptr;
2519 
2520  unsigned AS = V->getType()->getPointerAddressSpace();
2521  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2522  APInt Offset = APInt::getZero(IntPtrWidth);
2523 
2524  // Even though we don't look through PHI nodes, we could be called on an
2525  // instruction in an unreachable block, which may be on a cycle.
2526  SmallPtrSet<Value *, 4> Visited;
2527  Visited.insert(V);
2528  do {
2529  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2530  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2531  return nullptr;
2532  V = GEP->getPointerOperand();
2533  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2534  V = cast<Operator>(V)->getOperand(0);
2535  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2536  if (GA->isInterposable())
2537  break;
2538  V = GA->getAliasee();
2539  } else {
2540  break;
2541  }
2542  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2543  } while (Visited.insert(V).second);
2544 
2545  Type *IdxPtrTy = DL.getIndexType(V->getType());
2546  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2547 }
2548 
2549 /// Find dead blocks due to deleted CFG edges during inlining.
2550 ///
2551 /// If we know the successor of the current block, \p CurrBB, has to be \p
2552 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2553 /// no live incoming CFG edges. If one block is found to be dead, we can
2554 /// continue growing the dead block list by checking the successors of the dead
2555 /// blocks to see if all their incoming edges are dead or not.
2556 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2557  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2558  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2559  // known successor which is not the one under exam.
2560  return (DeadBlocks.count(Pred) ||
2561  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2562  };
2563 
2564  auto IsNewlyDead = [&](BasicBlock *BB) {
2565  // If all the edges to a block are dead, the block is also dead.
2566  return (!DeadBlocks.count(BB) &&
2568  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2569  };
2570 
2571  for (BasicBlock *Succ : successors(CurrBB)) {
2572  if (Succ == NextBB || !IsNewlyDead(Succ))
2573  continue;
2575  NewDead.push_back(Succ);
2576  while (!NewDead.empty()) {
2577  BasicBlock *Dead = NewDead.pop_back_val();
2578  if (DeadBlocks.insert(Dead).second)
2579  // Continue growing the dead block lists.
2580  for (BasicBlock *S : successors(Dead))
2581  if (IsNewlyDead(S))
2582  NewDead.push_back(S);
2583  }
2584  }
2585 }
2586 
2587 /// Analyze a call site for potential inlining.
2588 ///
2589 /// Returns true if inlining this call is viable, and false if it is not
2590 /// viable. It computes the cost and adjusts the threshold based on numerous
2591 /// factors and heuristics. If this method returns false but the computed cost
2592 /// is below the computed threshold, then inlining was forcibly disabled by
2593 /// some artifact of the routine.
2594 InlineResult CallAnalyzer::analyze() {
2595  ++NumCallsAnalyzed;
2596 
2597  auto Result = onAnalysisStart();
2598  if (!Result.isSuccess())
2599  return Result;
2600 
2601  if (F.empty())
2602  return InlineResult::success();
2603 
2604  Function *Caller = CandidateCall.getFunction();
2605  // Check if the caller function is recursive itself.
2606  for (User *U : Caller->users()) {
2607  CallBase *Call = dyn_cast<CallBase>(U);
2608  if (Call && Call->getFunction() == Caller) {
2609  IsCallerRecursive = true;
2610  break;
2611  }
2612  }
2613 
2614  // Populate our simplified values by mapping from function arguments to call
2615  // arguments with known important simplifications.
2616  auto CAI = CandidateCall.arg_begin();
2617  for (Argument &FAI : F.args()) {
2618  assert(CAI != CandidateCall.arg_end());
2619  if (Constant *C = dyn_cast<Constant>(CAI))
2620  SimplifiedValues[&FAI] = C;
2621 
2622  Value *PtrArg = *CAI;
2623  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2624  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2625 
2626  // We can SROA any pointer arguments derived from alloca instructions.
2627  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2628  SROAArgValues[&FAI] = SROAArg;
2629  onInitializeSROAArg(SROAArg);
2630  EnabledSROAAllocas.insert(SROAArg);
2631  }
2632  }
2633  ++CAI;
2634  }
2635  NumConstantArgs = SimplifiedValues.size();
2636  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2637  NumAllocaArgs = SROAArgValues.size();
2638 
2639  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2640  // the ephemeral values multiple times (and they're completely determined by
2641  // the callee, so this is purely duplicate work).
2643  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2644 
2645  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2646  // adding basic blocks of the callee which can be proven to be dead for this
2647  // particular call site in order to get more accurate cost estimates. This
2648  // requires a somewhat heavyweight iteration pattern: we need to walk the
2649  // basic blocks in a breadth-first order as we insert live successors. To
2650  // accomplish this, prioritizing for small iterations because we exit after
2651  // crossing our threshold, we use a small-size optimized SetVector.
2654  BBSetVector;
2655  BBSetVector BBWorklist;
2656  BBWorklist.insert(&F.getEntryBlock());
2657 
2658  // Note that we *must not* cache the size, this loop grows the worklist.
2659  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2660  if (shouldStop())
2661  break;
2662 
2663  BasicBlock *BB = BBWorklist[Idx];
2664  if (BB->empty())
2665  continue;
2666 
2667  onBlockStart(BB);
2668 
2669  // Disallow inlining a blockaddress with uses other than strictly callbr.
2670  // A blockaddress only has defined behavior for an indirect branch in the
2671  // same function, and we do not currently support inlining indirect
2672  // branches. But, the inliner may not see an indirect branch that ends up
2673  // being dead code at a particular call site. If the blockaddress escapes
2674  // the function, e.g., via a global variable, inlining may lead to an
2675  // invalid cross-function reference.
2676  // FIXME: pr/39560: continue relaxing this overt restriction.
2677  if (BB->hasAddressTaken())
2678  for (User *U : BlockAddress::get(&*BB)->users())
2679  if (!isa<CallBrInst>(*U))
2680  return InlineResult::failure("blockaddress used outside of callbr");
2681 
2682  // Analyze the cost of this block. If we blow through the threshold, this
2683  // returns false, and we can bail on out.
2684  InlineResult IR = analyzeBlock(BB, EphValues);
2685  if (!IR.isSuccess())
2686  return IR;
2687 
2688  Instruction *TI = BB->getTerminator();
2689 
2690  // Add in the live successors by first checking whether we have terminator
2691  // that may be simplified based on the values simplified by this call.
2692  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2693  if (BI->isConditional()) {
2694  Value *Cond = BI->getCondition();
2695  if (ConstantInt *SimpleCond =
2696  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2697  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2698  BBWorklist.insert(NextBB);
2699  KnownSuccessors[BB] = NextBB;
2700  findDeadBlocks(BB, NextBB);
2701  continue;
2702  }
2703  }
2704  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2705  Value *Cond = SI->getCondition();
2706  if (ConstantInt *SimpleCond =
2707  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2708  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2709  BBWorklist.insert(NextBB);
2710  KnownSuccessors[BB] = NextBB;
2711  findDeadBlocks(BB, NextBB);
2712  continue;
2713  }
2714  }
2715 
2716  // If we're unable to select a particular successor, just count all of
2717  // them.
2718  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2719  ++TIdx)
2720  BBWorklist.insert(TI->getSuccessor(TIdx));
2721 
2722  onBlockAnalyzed(BB);
2723  }
2724 
2725  // If this is a noduplicate call, we can still inline as long as
2726  // inlining this would cause the removal of the caller (so the instruction
2727  // is not actually duplicated, just moved).
2728  if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
2729  return InlineResult::failure("noduplicate");
2730 
2731  // If the callee's stack size exceeds the user-specified threshold,
2732  // do not let it be inlined.
2733  // The command line option overrides a limit set in the function attributes.
2734  size_t FinalStackSizeThreshold = StackSizeThreshold;
2735  if (!StackSizeThreshold.getNumOccurrences())
2736  if (Optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
2738  FinalStackSizeThreshold = *AttrMaxStackSize;
2739  if (AllocatedSize > FinalStackSizeThreshold)
2740  return InlineResult::failure("stacksize");
2741 
2742  return finalizeAnalysis();
2743 }
2744 
2746 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2748  F.print(OS, &Writer);
2749  DEBUG_PRINT_STAT(NumConstantArgs);
2750  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2751  DEBUG_PRINT_STAT(NumAllocaArgs);
2752  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2753  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2754  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2755  DEBUG_PRINT_STAT(NumInstructions);
2756  DEBUG_PRINT_STAT(SROACostSavings);
2757  DEBUG_PRINT_STAT(SROACostSavingsLost);
2758  DEBUG_PRINT_STAT(LoadEliminationCost);
2759  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2760  DEBUG_PRINT_STAT(Cost);
2761  DEBUG_PRINT_STAT(Threshold);
2762 #undef DEBUG_PRINT_STAT
2763 }
2764 
2765 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2766 /// Dump stats about this call's analysis.
2768 #endif
2769 
2770 /// Test that there are no attribute conflicts between Caller and Callee
2771 /// that prevent inlining.
2773  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2774  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2775  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2776  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2777  // object, and always returns the same object (which is overwritten on each
2778  // GetTLI call). Therefore we copy the first result.
2779  auto CalleeTLI = GetTLI(*Callee);
2780  return (IgnoreTTIInlineCompatible ||
2781  TTI.areInlineCompatible(Caller, Callee)) &&
2782  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2785 }
2786 
2787 int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
2788  int64_t Cost = 0;
2789  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2790  if (Call.isByValArgument(I)) {
2791  // We approximate the number of loads and stores needed by dividing the
2792  // size of the byval type by the target's pointer size.
2793  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2794  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2795  unsigned AS = PTy->getAddressSpace();
2796  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2797  // Ceiling division.
2798  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2799 
2800  // If it generates more than 8 stores it is likely to be expanded as an
2801  // inline memcpy so we take that as an upper bound. Otherwise we assume
2802  // one load and one store per word copied.
2803  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2804  // here instead of a magic number of 8, but it's not available via
2805  // DataLayout.
2806  NumStores = std::min(NumStores, 8U);
2807 
2808  Cost += 2 * NumStores * InstrCost;
2809  } else {
2810  // For non-byval arguments subtract off one instruction per call
2811  // argument.
2812  Cost += InstrCost;
2813  }
2814  }
2815  // The call instruction also disappears after inlining.
2816  Cost += InstrCost;
2817  Cost += CallPenalty;
2818  return std::min<int64_t>(Cost, INT_MAX);
2819 }
2820 
2822  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2823  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2824  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2827  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2828  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2829 }
2830 
2832  CallBase &Call, TargetTransformInfo &CalleeTTI,
2833  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2836  const InlineParams Params = {/* DefaultThreshold*/ 0,
2837  /*HintThreshold*/ {},
2838  /*ColdThreshold*/ {},
2839  /*OptSizeThreshold*/ {},
2840  /*OptMinSizeThreshold*/ {},
2841  /*HotCallSiteThreshold*/ {},
2842  /*LocallyHotCallSiteThreshold*/ {},
2843  /*ColdCallSiteThreshold*/ {},
2844  /*ComputeFullInlineCost*/ true,
2845  /*EnableDeferral*/ true};
2846 
2847  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2848  GetAssumptionCache, GetBFI, PSI, ORE, true,
2849  /*IgnoreThreshold*/ true);
2850  auto R = CA.analyze();
2851  if (!R.isSuccess())
2852  return None;
2853  return CA.getCost();
2854 }
2855 
2857  CallBase &Call, TargetTransformInfo &CalleeTTI,
2858  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2861  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2862  ORE, *Call.getCalledFunction(), Call);
2863  auto R = CFA.analyze();
2864  if (!R.isSuccess())
2865  return None;
2866  return CFA.features();
2867 }
2868 
2870  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2871  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2872 
2873  // Cannot inline indirect calls.
2874  if (!Callee)
2875  return InlineResult::failure("indirect call");
2876 
2877  // When callee coroutine function is inlined into caller coroutine function
2878  // before coro-split pass,
2879  // coro-early pass can not handle this quiet well.
2880  // So we won't inline the coroutine function if it have not been unsplited
2881  if (Callee->isPresplitCoroutine())
2882  return InlineResult::failure("unsplited coroutine call");
2883 
2884  // Never inline calls with byval arguments that does not have the alloca
2885  // address space. Since byval arguments can be replaced with a copy to an
2886  // alloca, the inlined code would need to be adjusted to handle that the
2887  // argument is in the alloca address space (so it is a little bit complicated
2888  // to solve).
2889  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2890  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2891  if (Call.isByValArgument(I)) {
2892  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2893  if (PTy->getAddressSpace() != AllocaAS)
2894  return InlineResult::failure("byval arguments without alloca"
2895  " address space");
2896  }
2897 
2898  // Calls to functions with always-inline attributes should be inlined
2899  // whenever possible.
2900  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2901  if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2902  return InlineResult::failure("noinline call site attribute");
2903 
2904  auto IsViable = isInlineViable(*Callee);
2905  if (IsViable.isSuccess())
2906  return InlineResult::success();
2907  return InlineResult::failure(IsViable.getFailureReason());
2908  }
2909 
2910  // Never inline functions with conflicting attributes (unless callee has
2911  // always-inline attribute).
2912  Function *Caller = Call.getCaller();
2913  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2914  return InlineResult::failure("conflicting attributes");
2915 
2916  // Don't inline this call if the caller has the optnone attribute.
2917  if (Caller->hasOptNone())
2918  return InlineResult::failure("optnone attribute");
2919 
2920  // Don't inline a function that treats null pointer as valid into a caller
2921  // that does not have this attribute.
2922  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2923  return InlineResult::failure("nullptr definitions incompatible");
2924 
2925  // Don't inline functions which can be interposed at link-time.
2926  if (Callee->isInterposable())
2927  return InlineResult::failure("interposable");
2928 
2929  // Don't inline functions marked noinline.
2930  if (Callee->hasFnAttribute(Attribute::NoInline))
2931  return InlineResult::failure("noinline function attribute");
2932 
2933  // Don't inline call sites marked noinline.
2934  if (Call.isNoInline())
2935  return InlineResult::failure("noinline call site attribute");
2936 
2937  return None;
2938 }
2939 
2941  CallBase &Call, Function *Callee, const InlineParams &Params,
2942  TargetTransformInfo &CalleeTTI,
2943  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2944  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2947 
2948  auto UserDecision =
2949  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2950 
2951  if (UserDecision) {
2952  if (UserDecision->isSuccess())
2953  return llvm::InlineCost::getAlways("always inline attribute");
2954  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2955  }
2956 
2957  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2958  << "... (caller:" << Call.getCaller()->getName()
2959  << ")\n");
2960 
2961  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2962  GetAssumptionCache, GetBFI, PSI, ORE);
2963  InlineResult ShouldInline = CA.analyze();
2964 
2965  LLVM_DEBUG(CA.dump());
2966 
2967  // Always make cost benefit based decision explicit.
2968  // We use always/never here since threshold is not meaningful,
2969  // as it's not what drives cost-benefit analysis.
2970  if (CA.wasDecidedByCostBenefit()) {
2971  if (ShouldInline.isSuccess())
2972  return InlineCost::getAlways("benefit over cost",
2973  CA.getCostBenefitPair());
2974  else
2975  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2976  }
2977 
2978  if (CA.wasDecidedByCostThreshold())
2979  return InlineCost::get(CA.getCost(), CA.getThreshold(),
2980  CA.getStaticBonusApplied());
2981 
2982  // No details on how the decision was made, simply return always or never.
2983  return ShouldInline.isSuccess()
2984  ? InlineCost::getAlways("empty function")
2985  : InlineCost::getNever(ShouldInline.getFailureReason());
2986 }
2987 
2989  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2990  for (BasicBlock &BB : F) {
2991  // Disallow inlining of functions which contain indirect branches.
2992  if (isa<IndirectBrInst>(BB.getTerminator()))
2993  return InlineResult::failure("contains indirect branches");
2994 
2995  // Disallow inlining of blockaddresses which are used by non-callbr
2996  // instructions.
2997  if (BB.hasAddressTaken())
2998  for (User *U : BlockAddress::get(&BB)->users())
2999  if (!isa<CallBrInst>(*U))
3000  return InlineResult::failure("blockaddress used outside of callbr");
3001 
3002  for (auto &II : BB) {
3003  CallBase *Call = dyn_cast<CallBase>(&II);
3004  if (!Call)
3005  continue;
3006 
3007  // Disallow recursive calls.
3008  Function *Callee = Call->getCalledFunction();
3009  if (&F == Callee)
3010  return InlineResult::failure("recursive call");
3011 
3012  // Disallow calls which expose returns-twice to a function not previously
3013  // attributed as such.
3014  if (!ReturnsTwice && isa<CallInst>(Call) &&
3015  cast<CallInst>(Call)->canReturnTwice())
3016  return InlineResult::failure("exposes returns-twice attribute");
3017 
3018  if (Callee)
3019  switch (Callee->getIntrinsicID()) {
3020  default:
3021  break;
3022  case llvm::Intrinsic::icall_branch_funnel:
3023  // Disallow inlining of @llvm.icall.branch.funnel because current
3024  // backend can't separate call targets from call arguments.
3025  return InlineResult::failure(
3026  "disallowed inlining of @llvm.icall.branch.funnel");
3027  case llvm::Intrinsic::localescape:
3028  // Disallow inlining functions that call @llvm.localescape. Doing this
3029  // correctly would require major changes to the inliner.
3030  return InlineResult::failure(
3031  "disallowed inlining of @llvm.localescape");
3032  case llvm::Intrinsic::vastart:
3033  // Disallow inlining of functions that initialize VarArgs with
3034  // va_start.
3035  return InlineResult::failure(
3036  "contains VarArgs initialized with va_start");
3037  }
3038  }
3039  }
3040 
3041  return InlineResult::success();
3042 }
3043 
3044 // APIs to create InlineParams based on command line flags and/or other
3045 // parameters.
3046 
3048  InlineParams Params;
3049 
3050  // This field is the threshold to use for a callee by default. This is
3051  // derived from one or more of:
3052  // * optimization or size-optimization levels,
3053  // * a value passed to createFunctionInliningPass function, or
3054  // * the -inline-threshold flag.
3055  // If the -inline-threshold flag is explicitly specified, that is used
3056  // irrespective of anything else.
3057  if (InlineThreshold.getNumOccurrences() > 0)
3059  else
3060  Params.DefaultThreshold = Threshold;
3061 
3062  // Set the HintThreshold knob from the -inlinehint-threshold.
3063  Params.HintThreshold = HintThreshold;
3064 
3065  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3067 
3068  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3069  // populate LocallyHotCallSiteThreshold. Later, we populate
3070  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3071  // we know that optimization level is O3 (in the getInlineParams variant that
3072  // takes the opt and size levels).
3073  // FIXME: Remove this check (and make the assignment unconditional) after
3074  // addressing size regression issues at O2.
3075  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3077 
3078  // Set the ColdCallSiteThreshold knob from the
3079  // -inline-cold-callsite-threshold.
3081 
3082  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3083  // -inlinehint-threshold commandline option is not explicitly given. If that
3084  // option is present, then its value applies even for callees with size and
3085  // minsize attributes.
3086  // If the -inline-threshold is not specified, set the ColdThreshold from the
3087  // -inlinecold-threshold even if it is not explicitly passed. If
3088  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3089  // explicitly specified to set the ColdThreshold knob
3090  if (InlineThreshold.getNumOccurrences() == 0) {
3093  Params.ColdThreshold = ColdThreshold;
3094  } else if (ColdThreshold.getNumOccurrences() > 0) {
3095  Params.ColdThreshold = ColdThreshold;
3096  }
3097  return Params;
3098 }
3099 
3102 }
3103 
3104 // Compute the default threshold for inlining based on the opt level and the
3105 // size opt level.
3106 static int computeThresholdFromOptLevels(unsigned OptLevel,
3107  unsigned SizeOptLevel) {
3108  if (OptLevel > 2)
3110  if (SizeOptLevel == 1) // -Os
3112  if (SizeOptLevel == 2) // -Oz
3114  return DefaultThreshold;
3115 }
3116 
3117 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3118  auto Params =
3119  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3120  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3121  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3122  // when it is specified explicitly.
3123  if (OptLevel > 2)
3125  return Params;
3126 }
3127 
3131  PrintInstructionComments = true;
3132  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3133  [&](Function &F) -> AssumptionCache & {
3134  return FAM.getResult<AssumptionAnalysis>(F);
3135  };
3136  Module *M = F.getParent();
3137  ProfileSummaryInfo PSI(*M);
3138  DataLayout DL(M);
3140  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3141  // In the current implementation, the type of InlineParams doesn't matter as
3142  // the pass serves only for verification of inliner's decisions.
3143  // We can add a flag which determines InlineParams for this run. Right now,
3144  // the default InlineParams are used.
3145  const InlineParams Params = llvm::getInlineParams();
3146  for (BasicBlock &BB : F) {
3147  for (Instruction &I : BB) {
3148  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3149  Function *CalledFunction = CI->getCalledFunction();
3150  if (!CalledFunction || CalledFunction->isDeclaration())
3151  continue;
3152  OptimizationRemarkEmitter ORE(CalledFunction);
3153  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3154  GetAssumptionCache, nullptr, &PSI, &ORE);
3155  ICCA.analyze();
3156  OS << " Analyzing call of " << CalledFunction->getName()
3157  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3158  ICCA.print(OS);
3159  OS << "\n";
3160  }
3161  }
3162  }
3163  return PreservedAnalyses::all();
3164 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:93
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:182
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:734
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:807
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:184
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:224
llvm::getStringFnAttrAsInt
Optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
Definition: InlineCost.cpp:173
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:744
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3051
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:205
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1765
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::Attribute
Definition: Attributes.h:66
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5255
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2496
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:224
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2414
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
MemAccessCost
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:38
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1697
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:124
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1254
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2988
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:203
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
Operator.h
llvm::InlineConstants::FunctionInlineCostMultiplierAttributeName
const char FunctionInlineCostMultiplierAttributeName[]
Definition: InlineCost.h:59
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:211
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:206
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:217
llvm::UnaryOperator
Definition: InstrTypes.h:101
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
RecurStackSizeThreshold
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2772
F
#define F(x, y, z)
Definition: MD5.cpp:55
InlinePriorityMode::CostBenefit
@ CostBenefit
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
InstrCost
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2637
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:280
CodeMetrics.h
FormattedStream.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1071
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:271
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2821
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
InlinePriorityMode::Cost
@ Cost
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:119
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:89
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3245
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::simplifyBinOp
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5636
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1499
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:51
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::InlineConstants::getInstrCost
int getInstrCost()
Definition: InlineCost.cpp:182
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:186
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3129
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
SmallPtrSet.h
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3100
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
IgnoreTTIInlineCompatible
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3214
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3810
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::InlineConstants::MaxInlineStackSizeAttributeName
const char MaxInlineStackSizeAttributeName[]
Definition: InlineCost.h:62
llvm::AllocFnKind::Free
@ Free
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:819
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:48
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4581
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:34
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:807
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:85
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:44
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:338
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:121
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1736
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:167
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:290
llvm::Record
Definition: Record.h:1573
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:227
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:128
llvm::GEPOperator
Definition: Operator.h:375
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3211
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
StackSizeThreshold
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:243
llvm::ConstantFoldInstOperands
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
Definition: ConstantFolding.cpp:1213
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1615
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:221
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:73
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:804
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5204
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:187
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:343
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:528
users
iv users
Definition: IVUsers.cpp:48
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:57
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::simplifyFNegInst
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:5163
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2869
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4254
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:208
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:51
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2856
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2445
llvm::TypeSize
Definition: TypeSize.h:435
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:99
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::getCallsiteCost
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2787
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:183
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:197
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:41
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::CleanupReturnInst
Definition: Instructions.h:4662
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:203
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5161
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3675
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:202
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3106
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:217
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2831
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2124
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:50
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:208
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1751
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2698
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4770
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:220
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3277
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:413
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:62
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3133
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:50
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:54
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:236
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2556
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:49
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:589
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:177
Debug.h
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:214
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3212
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3226
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365