LLVM  15.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "inline-cost"
49 
50 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
51 
52 static cl::opt<int>
53  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
55  cl::desc("Default amount of inlining to perform"));
56 
57 // We introduce this option since there is a minor compile-time win by avoiding
58 // addition of TTI attributes (target-features in particular) to inline
59 // candidates when they are guaranteed to be the same as top level methods in
60 // some use cases. If we avoid adding the attribute, we need an option to avoid
61 // checking these attributes.
63  "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
64  cl::desc("Ignore TTI attributes compatibility check between callee/caller "
65  "during inline cost calculation"));
66 
68  "print-instruction-comments", cl::Hidden, cl::init(false),
69  cl::desc("Prints comments for instruction based on inline cost analysis"));
70 
72  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
73  cl::desc("Control the amount of inlining to perform (default = 225)"));
74 
76  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
77  cl::desc("Threshold for inlining functions with inline hint"));
78 
79 static cl::opt<int>
80  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
82  cl::desc("Threshold for inlining cold callsites"));
83 
85  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
86  cl::desc("Enable the cost-benefit analysis for the inliner"));
87 
89  "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
90  cl::desc("Multiplier to multiply cycle savings by during inlining"));
91 
92 static cl::opt<int>
93  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
95  cl::desc("The maximum size of a callee that get's "
96  "inlined without sufficient cycle savings"));
97 
98 // We introduce this threshold to help performance of instrumentation based
99 // PGO before we actually hook up inliner with analysis passes such as BPI and
100 // BFI.
102  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
103  cl::desc("Threshold for inlining functions with cold attribute"));
104 
105 static cl::opt<int>
106  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
108  cl::desc("Threshold for hot callsites "));
109 
111  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
112  cl::desc("Threshold for locally hot callsites "));
113 
115  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
116  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
117  "entry frequency, for a callsite to be cold in the absence of "
118  "profile information."));
119 
121  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
122  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
123  "entry frequency, for a callsite to be hot in the absence of "
124  "profile information."));
125 
127  "inline-call-penalty", cl::Hidden, cl::init(25),
128  cl::desc("Call penalty that is applied per callsite when inlining"));
129 
131  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
132  cl::desc("Compute the full inline cost of a call site even when the cost "
133  "exceeds the threshold."));
134 
136  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
138  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
139  "attributes."));
140 
142  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
143  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
144 
145 namespace llvm {
147  Attribute Attr = CB.getFnAttr(AttrKind);
148  int AttrValue;
149  if (Attr.getValueAsString().getAsInteger(10, AttrValue))
150  return None;
151  return AttrValue;
152 }
153 } // namespace llvm
154 
155 namespace {
156 class InlineCostCallAnalyzer;
157 
158 // This struct is used to store information about inline cost of a
159 // particular instruction
160 struct InstructionCostDetail {
161  int CostBefore = 0;
162  int CostAfter = 0;
163  int ThresholdBefore = 0;
164  int ThresholdAfter = 0;
165 
166  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
167 
168  int getCostDelta() const { return CostAfter - CostBefore; }
169 
170  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
171 };
172 
173 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
174 private:
175  InlineCostCallAnalyzer *const ICCA;
176 
177 public:
178  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
179  virtual void emitInstructionAnnot(const Instruction *I,
180  formatted_raw_ostream &OS) override;
181 };
182 
183 /// Carry out call site analysis, in order to evaluate inlinability.
184 /// NOTE: the type is currently used as implementation detail of functions such
185 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
186 /// expectation is that they come from the outer scope, from the wrapper
187 /// functions. If we want to support constructing CallAnalyzer objects where
188 /// lambdas are provided inline at construction, or where the object needs to
189 /// otherwise survive past the scope of the provided functions, we need to
190 /// revisit the argument types.
191 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
193  friend class InstVisitor<CallAnalyzer, bool>;
194 
195 protected:
196  virtual ~CallAnalyzer() = default;
197  /// The TargetTransformInfo available for this compilation.
198  const TargetTransformInfo &TTI;
199 
200  /// Getter for the cache of @llvm.assume intrinsics.
201  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
202 
203  /// Getter for BlockFrequencyInfo
205 
206  /// Profile summary information.
207  ProfileSummaryInfo *PSI;
208 
209  /// The called function.
210  Function &F;
211 
212  // Cache the DataLayout since we use it a lot.
213  const DataLayout &DL;
214 
215  /// The OptimizationRemarkEmitter available for this compilation.
217 
218  /// The candidate callsite being analyzed. Please do not use this to do
219  /// analysis in the caller function; we want the inline cost query to be
220  /// easily cacheable. Instead, use the cover function paramHasAttr.
221  CallBase &CandidateCall;
222 
223  /// Extension points for handling callsite features.
224  // Called before a basic block was analyzed.
225  virtual void onBlockStart(const BasicBlock *BB) {}
226 
227  /// Called after a basic block was analyzed.
228  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
229 
230  /// Called before an instruction was analyzed
231  virtual void onInstructionAnalysisStart(const Instruction *I) {}
232 
233  /// Called after an instruction was analyzed
234  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
235 
236  /// Called at the end of the analysis of the callsite. Return the outcome of
237  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
238  /// the reason it can't.
239  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
240  /// Called when we're about to start processing a basic block, and every time
241  /// we are done processing an instruction. Return true if there is no point in
242  /// continuing the analysis (e.g. we've determined already the call site is
243  /// too expensive to inline)
244  virtual bool shouldStop() { return false; }
245 
246  /// Called before the analysis of the callee body starts (with callsite
247  /// contexts propagated). It checks callsite-specific information. Return a
248  /// reason analysis can't continue if that's the case, or 'true' if it may
249  /// continue.
250  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
251  /// Called if the analysis engine decides SROA cannot be done for the given
252  /// alloca.
253  virtual void onDisableSROA(AllocaInst *Arg) {}
254 
255  /// Called the analysis engine determines load elimination won't happen.
256  virtual void onDisableLoadElimination() {}
257 
258  /// Called when we visit a CallBase, before the analysis starts. Return false
259  /// to stop further processing of the instruction.
260  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
261 
262  /// Called to account for a call.
263  virtual void onCallPenalty() {}
264 
265  /// Called to account for the expectation the inlining would result in a load
266  /// elimination.
267  virtual void onLoadEliminationOpportunity() {}
268 
269  /// Called to account for the cost of argument setup for the Call in the
270  /// callee's body (not the callsite currently under analysis).
271  virtual void onCallArgumentSetup(const CallBase &Call) {}
272 
273  /// Called to account for a load relative intrinsic.
274  virtual void onLoadRelativeIntrinsic() {}
275 
276  /// Called to account for a lowered call.
277  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
278  }
279 
280  /// Account for a jump table of given size. Return false to stop further
281  /// processing the switch instruction
282  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
283 
284  /// Account for a case cluster of given size. Return false to stop further
285  /// processing of the instruction.
286  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
287 
288  /// Called at the end of processing a switch instruction, with the given
289  /// number of case clusters.
290  virtual void onFinalizeSwitch(unsigned JumpTableSize,
291  unsigned NumCaseCluster) {}
292 
293  /// Called to account for any other instruction not specifically accounted
294  /// for.
295  virtual void onMissedSimplification() {}
296 
297  /// Start accounting potential benefits due to SROA for the given alloca.
298  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
299 
300  /// Account SROA savings for the AllocaInst value.
301  virtual void onAggregateSROAUse(AllocaInst *V) {}
302 
303  bool handleSROA(Value *V, bool DoNotDisable) {
304  // Check for SROA candidates in comparisons.
305  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
306  if (DoNotDisable) {
307  onAggregateSROAUse(SROAArg);
308  return true;
309  }
310  disableSROAForArg(SROAArg);
311  }
312  return false;
313  }
314 
315  bool IsCallerRecursive = false;
316  bool IsRecursiveCall = false;
317  bool ExposesReturnsTwice = false;
318  bool HasDynamicAlloca = false;
319  bool ContainsNoDuplicateCall = false;
320  bool HasReturn = false;
321  bool HasIndirectBr = false;
322  bool HasUninlineableIntrinsic = false;
323  bool InitsVargArgs = false;
324 
325  /// Number of bytes allocated statically by the callee.
326  uint64_t AllocatedSize = 0;
327  unsigned NumInstructions = 0;
328  unsigned NumVectorInstructions = 0;
329 
330  /// While we walk the potentially-inlined instructions, we build up and
331  /// maintain a mapping of simplified values specific to this callsite. The
332  /// idea is to propagate any special information we have about arguments to
333  /// this call through the inlinable section of the function, and account for
334  /// likely simplifications post-inlining. The most important aspect we track
335  /// is CFG altering simplifications -- when we prove a basic block dead, that
336  /// can cause dramatic shifts in the cost of inlining a function.
337  DenseMap<Value *, Constant *> SimplifiedValues;
338 
339  /// Keep track of the values which map back (through function arguments) to
340  /// allocas on the caller stack which could be simplified through SROA.
341  DenseMap<Value *, AllocaInst *> SROAArgValues;
342 
343  /// Keep track of Allocas for which we believe we may get SROA optimization.
344  DenseSet<AllocaInst *> EnabledSROAAllocas;
345 
346  /// Keep track of values which map to a pointer base and constant offset.
348 
349  /// Keep track of dead blocks due to the constant arguments.
351 
352  /// The mapping of the blocks to their known unique successors due to the
353  /// constant arguments.
354  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
355 
356  /// Model the elimination of repeated loads that is expected to happen
357  /// whenever we simplify away the stores that would otherwise cause them to be
358  /// loads.
359  bool EnableLoadElimination = true;
360 
361  /// Whether we allow inlining for recursive call.
362  bool AllowRecursiveCall = false;
363 
364  SmallPtrSet<Value *, 16> LoadAddrSet;
365 
366  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
367  auto It = SROAArgValues.find(V);
368  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
369  return nullptr;
370  return It->second;
371  }
372 
373  // Custom simplification helper routines.
374  bool isAllocaDerivedArg(Value *V);
375  void disableSROAForArg(AllocaInst *SROAArg);
376  void disableSROA(Value *V);
377  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
378  void disableLoadElimination();
379  bool isGEPFree(GetElementPtrInst &GEP);
380  bool canFoldInboundsGEP(GetElementPtrInst &I);
381  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
382  bool simplifyCallSite(Function *F, CallBase &Call);
383  template <typename Callable>
384  bool simplifyInstruction(Instruction &I, Callable Evaluate);
385  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
386  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
387 
388  /// Return true if the given argument to the function being considered for
389  /// inlining has the given attribute set either at the call site or the
390  /// function declaration. Primarily used to inspect call site specific
391  /// attributes since these can be more precise than the ones on the callee
392  /// itself.
393  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
394 
395  /// Return true if the given value is known non null within the callee if
396  /// inlined through this particular callsite.
397  bool isKnownNonNullInCallee(Value *V);
398 
399  /// Return true if size growth is allowed when inlining the callee at \p Call.
400  bool allowSizeGrowth(CallBase &Call);
401 
402  // Custom analysis routines.
403  InlineResult analyzeBlock(BasicBlock *BB,
404  SmallPtrSetImpl<const Value *> &EphValues);
405 
406  // Disable several entry points to the visitor so we don't accidentally use
407  // them by declaring but not defining them here.
408  void visit(Module *);
409  void visit(Module &);
410  void visit(Function *);
411  void visit(Function &);
412  void visit(BasicBlock *);
413  void visit(BasicBlock &);
414 
415  // Provide base case for our instruction visit.
416  bool visitInstruction(Instruction &I);
417 
418  // Our visit overrides.
419  bool visitAlloca(AllocaInst &I);
420  bool visitPHI(PHINode &I);
421  bool visitGetElementPtr(GetElementPtrInst &I);
422  bool visitBitCast(BitCastInst &I);
423  bool visitPtrToInt(PtrToIntInst &I);
424  bool visitIntToPtr(IntToPtrInst &I);
425  bool visitCastInst(CastInst &I);
426  bool visitCmpInst(CmpInst &I);
427  bool visitSub(BinaryOperator &I);
428  bool visitBinaryOperator(BinaryOperator &I);
429  bool visitFNeg(UnaryOperator &I);
430  bool visitLoad(LoadInst &I);
431  bool visitStore(StoreInst &I);
432  bool visitExtractValue(ExtractValueInst &I);
433  bool visitInsertValue(InsertValueInst &I);
434  bool visitCallBase(CallBase &Call);
435  bool visitReturnInst(ReturnInst &RI);
436  bool visitBranchInst(BranchInst &BI);
437  bool visitSelectInst(SelectInst &SI);
438  bool visitSwitchInst(SwitchInst &SI);
439  bool visitIndirectBrInst(IndirectBrInst &IBI);
440  bool visitResumeInst(ResumeInst &RI);
441  bool visitCleanupReturnInst(CleanupReturnInst &RI);
442  bool visitCatchReturnInst(CatchReturnInst &RI);
443  bool visitUnreachableInst(UnreachableInst &I);
444 
445 public:
446  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
447  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
448  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
449  ProfileSummaryInfo *PSI = nullptr,
450  OptimizationRemarkEmitter *ORE = nullptr)
451  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
452  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
453  CandidateCall(Call) {}
454 
455  InlineResult analyze();
456 
457  Optional<Constant *> getSimplifiedValue(Instruction *I) {
458  if (SimplifiedValues.find(I) != SimplifiedValues.end())
459  return SimplifiedValues[I];
460  return None;
461  }
462 
463  // Keep a bunch of stats about the cost savings found so we can print them
464  // out when debugging.
465  unsigned NumConstantArgs = 0;
466  unsigned NumConstantOffsetPtrArgs = 0;
467  unsigned NumAllocaArgs = 0;
468  unsigned NumConstantPtrCmps = 0;
469  unsigned NumConstantPtrDiffs = 0;
470  unsigned NumInstructionsSimplified = 0;
471 
472  void dump();
473 };
474 
475 // Considering forming a binary search, we should find the number of nodes
476 // which is same as the number of comparisons when lowered. For a given
477 // number of clusters, n, we can define a recursive function, f(n), to find
478 // the number of nodes in the tree. The recursion is :
479 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
480 // and f(n) = n, when n <= 3.
481 // This will lead a binary tree where the leaf should be either f(2) or f(3)
482 // when n > 3. So, the number of comparisons from leaves should be n, while
483 // the number of non-leaf should be :
484 // 2^(log2(n) - 1) - 1
485 // = 2^log2(n) * 2^-1 - 1
486 // = n / 2 - 1.
487 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
488 // number of comparisons in a simple closed form :
489 // n + n / 2 - 1 = n * 3 / 2 - 1
490 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
491  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
492 }
493 
494 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
495 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
496 class InlineCostCallAnalyzer final : public CallAnalyzer {
497  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
498  const bool ComputeFullInlineCost;
499  int LoadEliminationCost = 0;
500  /// Bonus to be applied when percentage of vector instructions in callee is
501  /// high (see more details in updateThreshold).
502  int VectorBonus = 0;
503  /// Bonus to be applied when the callee has only one reachable basic block.
504  int SingleBBBonus = 0;
505 
506  /// Tunable parameters that control the analysis.
507  const InlineParams &Params;
508 
509  // This DenseMap stores the delta change in cost and threshold after
510  // accounting for the given instruction. The map is filled only with the
511  // flag PrintInstructionComments on.
513 
514  /// Upper bound for the inlining cost. Bonuses are being applied to account
515  /// for speculative "expected profit" of the inlining decision.
516  int Threshold = 0;
517 
518  /// Attempt to evaluate indirect calls to boost its inline cost.
519  const bool BoostIndirectCalls;
520 
521  /// Ignore the threshold when finalizing analysis.
522  const bool IgnoreThreshold;
523 
524  // True if the cost-benefit-analysis-based inliner is enabled.
525  const bool CostBenefitAnalysisEnabled;
526 
527  /// Inlining cost measured in abstract units, accounts for all the
528  /// instructions expected to be executed for a given function invocation.
529  /// Instructions that are statically proven to be dead based on call-site
530  /// arguments are not counted here.
531  int Cost = 0;
532 
533  // The cumulative cost at the beginning of the basic block being analyzed. At
534  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
535  // the size of that basic block.
536  int CostAtBBStart = 0;
537 
538  // The static size of live but cold basic blocks. This is "static" in the
539  // sense that it's not weighted by profile counts at all.
540  int ColdSize = 0;
541 
542  // Whether inlining is decided by cost-threshold analysis.
543  bool DecidedByCostThreshold = false;
544 
545  // Whether inlining is decided by cost-benefit analysis.
546  bool DecidedByCostBenefit = false;
547 
548  // The cost-benefit pair computed by cost-benefit analysis.
549  Optional<CostBenefitPair> CostBenefit = None;
550 
551  bool SingleBB = true;
552 
553  unsigned SROACostSavings = 0;
554  unsigned SROACostSavingsLost = 0;
555 
556  /// The mapping of caller Alloca values to their accumulated cost savings. If
557  /// we have to disable SROA for one of the allocas, this tells us how much
558  /// cost must be added.
559  DenseMap<AllocaInst *, int> SROAArgCosts;
560 
561  /// Return true if \p Call is a cold callsite.
562  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
563 
564  /// Update Threshold based on callsite properties such as callee
565  /// attributes and callee hotness for PGO builds. The Callee is explicitly
566  /// passed to support analyzing indirect calls whose target is inferred by
567  /// analysis.
568  void updateThreshold(CallBase &Call, Function &Callee);
569  /// Return a higher threshold if \p Call is a hot callsite.
570  Optional<int> getHotCallSiteThreshold(CallBase &Call,
571  BlockFrequencyInfo *CallerBFI);
572 
573  /// Handle a capped 'int' increment for Cost.
574  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
575  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
576  Cost = std::min<int>(UpperBound, Cost + Inc);
577  }
578 
579  void onDisableSROA(AllocaInst *Arg) override {
580  auto CostIt = SROAArgCosts.find(Arg);
581  if (CostIt == SROAArgCosts.end())
582  return;
583  addCost(CostIt->second);
584  SROACostSavings -= CostIt->second;
585  SROACostSavingsLost += CostIt->second;
586  SROAArgCosts.erase(CostIt);
587  }
588 
589  void onDisableLoadElimination() override {
590  addCost(LoadEliminationCost);
591  LoadEliminationCost = 0;
592  }
593 
594  bool onCallBaseVisitStart(CallBase &Call) override {
595  if (Optional<int> AttrCallThresholdBonus =
596  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
597  Threshold += *AttrCallThresholdBonus;
598 
599  if (Optional<int> AttrCallCost =
600  getStringFnAttrAsInt(Call, "call-inline-cost")) {
601  addCost(*AttrCallCost);
602  // Prevent further processing of the call since we want to override its
603  // inline cost, not just add to it.
604  return false;
605  }
606  return true;
607  }
608 
609  void onCallPenalty() override { addCost(CallPenalty); }
610  void onCallArgumentSetup(const CallBase &Call) override {
611  // Pay the price of the argument setup. We account for the average 1
612  // instruction per call argument setup here.
613  addCost(Call.arg_size() * InlineConstants::InstrCost);
614  }
615  void onLoadRelativeIntrinsic() override {
616  // This is normally lowered to 4 LLVM instructions.
617  addCost(3 * InlineConstants::InstrCost);
618  }
619  void onLoweredCall(Function *F, CallBase &Call,
620  bool IsIndirectCall) override {
621  // We account for the average 1 instruction per call argument setup here.
622  addCost(Call.arg_size() * InlineConstants::InstrCost);
623 
624  // If we have a constant that we are calling as a function, we can peer
625  // through it and see the function target. This happens not infrequently
626  // during devirtualization and so we want to give it a hefty bonus for
627  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
628  // Pretend to inline the function, with a custom threshold.
629  if (IsIndirectCall && BoostIndirectCalls) {
630  auto IndirectCallParams = Params;
631  IndirectCallParams.DefaultThreshold =
633  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
634  /// to instantiate the derived class.
635  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
636  GetAssumptionCache, GetBFI, PSI, ORE, false);
637  if (CA.analyze().isSuccess()) {
638  // We were able to inline the indirect call! Subtract the cost from the
639  // threshold to get the bonus we want to apply, but don't go below zero.
640  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
641  }
642  } else
643  // Otherwise simply add the cost for merely making the call.
644  addCost(CallPenalty);
645  }
646 
647  void onFinalizeSwitch(unsigned JumpTableSize,
648  unsigned NumCaseCluster) override {
649  // If suitable for a jump table, consider the cost for the table size and
650  // branch to destination.
651  // Maximum valid cost increased in this function.
652  if (JumpTableSize) {
653  int64_t JTCost =
654  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
656 
657  addCost(JTCost, static_cast<int64_t>(CostUpperBound));
658  return;
659  }
660 
661  if (NumCaseCluster <= 3) {
662  // Suppose a comparison includes one compare and one conditional branch.
663  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
664  return;
665  }
666 
667  int64_t ExpectedNumberOfCompare =
668  getExpectedNumberOfCompare(NumCaseCluster);
669  int64_t SwitchCost =
670  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
671 
672  addCost(SwitchCost, static_cast<int64_t>(CostUpperBound));
673  }
674  void onMissedSimplification() override {
676  }
677 
678  void onInitializeSROAArg(AllocaInst *Arg) override {
679  assert(Arg != nullptr &&
680  "Should not initialize SROA costs for null value.");
681  SROAArgCosts[Arg] = 0;
682  }
683 
684  void onAggregateSROAUse(AllocaInst *SROAArg) override {
685  auto CostIt = SROAArgCosts.find(SROAArg);
686  assert(CostIt != SROAArgCosts.end() &&
687  "expected this argument to have a cost");
688  CostIt->second += InlineConstants::InstrCost;
689  SROACostSavings += InlineConstants::InstrCost;
690  }
691 
692  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
693 
694  void onBlockAnalyzed(const BasicBlock *BB) override {
695  if (CostBenefitAnalysisEnabled) {
696  // Keep track of the static size of live but cold basic blocks. For now,
697  // we define a cold basic block to be one that's never executed.
698  assert(GetBFI && "GetBFI must be available");
699  BlockFrequencyInfo *BFI = &(GetBFI(F));
700  assert(BFI && "BFI must be available");
701  auto ProfileCount = BFI->getBlockProfileCount(BB);
702  assert(ProfileCount.hasValue());
703  if (ProfileCount.getValue() == 0)
704  ColdSize += Cost - CostAtBBStart;
705  }
706 
707  auto *TI = BB->getTerminator();
708  // If we had any successors at this point, than post-inlining is likely to
709  // have them as well. Note that we assume any basic blocks which existed
710  // due to branches or switches which folded above will also fold after
711  // inlining.
712  if (SingleBB && TI->getNumSuccessors() > 1) {
713  // Take off the bonus we applied to the threshold.
714  Threshold -= SingleBBBonus;
715  SingleBB = false;
716  }
717  }
718 
719  void onInstructionAnalysisStart(const Instruction *I) override {
720  // This function is called to store the initial cost of inlining before
721  // the given instruction was assessed.
723  return;
724  InstructionCostDetailMap[I].CostBefore = Cost;
725  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
726  }
727 
728  void onInstructionAnalysisFinish(const Instruction *I) override {
729  // This function is called to find new values of cost and threshold after
730  // the instruction has been assessed.
732  return;
733  InstructionCostDetailMap[I].CostAfter = Cost;
734  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
735  }
736 
737  bool isCostBenefitAnalysisEnabled() {
738  if (!PSI || !PSI->hasProfileSummary())
739  return false;
740 
741  if (!GetBFI)
742  return false;
743 
745  // Honor the explicit request from the user.
747  return false;
748  } else {
749  // Otherwise, require instrumentation profile.
750  if (!PSI->hasInstrumentationProfile())
751  return false;
752  }
753 
754  auto *Caller = CandidateCall.getParent()->getParent();
755  if (!Caller->getEntryCount())
756  return false;
757 
758  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
759  if (!CallerBFI)
760  return false;
761 
762  // For now, limit to hot call site.
763  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
764  return false;
765 
766  // Make sure we have a nonzero entry count.
767  auto EntryCount = F.getEntryCount();
768  if (!EntryCount || !EntryCount->getCount())
769  return false;
770 
771  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
772  if (!CalleeBFI)
773  return false;
774 
775  return true;
776  }
777 
778  // Determine whether we should inline the given call site, taking into account
779  // both the size cost and the cycle savings. Return None if we don't have
780  // suficient profiling information to determine.
781  Optional<bool> costBenefitAnalysis() {
782  if (!CostBenefitAnalysisEnabled)
783  return None;
784 
785  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
786  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
787  // falling back to the cost-based metric.
788  // TODO: Improve this hacky condition.
789  if (Threshold == 0)
790  return None;
791 
792  assert(GetBFI);
793  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
794  assert(CalleeBFI);
795 
796  // The cycle savings expressed as the sum of InlineConstants::InstrCost
797  // multiplied by the estimated dynamic count of each instruction we can
798  // avoid. Savings come from the call site cost, such as argument setup and
799  // the call instruction, as well as the instructions that are folded.
800  //
801  // We use 128-bit APInt here to avoid potential overflow. This variable
802  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
803  // assumes that we can avoid or fold a billion instructions, each with a
804  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
805  // period on a 4GHz machine.
806  APInt CycleSavings(128, 0);
807 
808  for (auto &BB : F) {
809  APInt CurrentSavings(128, 0);
810  for (auto &I : BB) {
811  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
812  // Count a conditional branch as savings if it becomes unconditional.
813  if (BI->isConditional() &&
814  isa_and_nonnull<ConstantInt>(
815  SimplifiedValues.lookup(BI->getCondition()))) {
816  CurrentSavings += InlineConstants::InstrCost;
817  }
818  } else if (Value *V = dyn_cast<Value>(&I)) {
819  // Count an instruction as savings if we can fold it.
820  if (SimplifiedValues.count(V)) {
821  CurrentSavings += InlineConstants::InstrCost;
822  }
823  }
824  }
825 
826  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
827  assert(ProfileCount.hasValue());
828  CurrentSavings *= ProfileCount.getValue();
829  CycleSavings += CurrentSavings;
830  }
831 
832  // Compute the cycle savings per call.
833  auto EntryProfileCount = F.getEntryCount();
834  assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
835  auto EntryCount = EntryProfileCount->getCount();
836  CycleSavings += EntryCount / 2;
837  CycleSavings = CycleSavings.udiv(EntryCount);
838 
839  // Compute the total savings for the call site.
840  auto *CallerBB = CandidateCall.getParent();
841  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
842  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
843  CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
844 
845  // Remove the cost of the cold basic blocks.
846  int Size = Cost - ColdSize;
847 
848  // Allow tiny callees to be inlined regardless of whether they meet the
849  // savings threshold.
851 
852  CostBenefit.emplace(APInt(128, Size), CycleSavings);
853 
854  // Return true if the savings justify the cost of inlining. Specifically,
855  // we evaluate the following inequality:
856  //
857  // CycleSavings PSI->getOrCompHotCountThreshold()
858  // -------------- >= -----------------------------------
859  // Size InlineSavingsMultiplier
860  //
861  // Note that the left hand side is specific to a call site. The right hand
862  // side is a constant for the entire executable.
863  APInt LHS = CycleSavings;
865  APInt RHS(128, PSI->getOrCompHotCountThreshold());
866  RHS *= Size;
867  return LHS.uge(RHS);
868  }
869 
870  InlineResult finalizeAnalysis() override {
871  // Loops generally act a lot like calls in that they act like barriers to
872  // movement, require a certain amount of setup, etc. So when optimising for
873  // size, we penalise any call sites that perform loops. We do this after all
874  // other costs here, so will likely only be dealing with relatively small
875  // functions (and hence DT and LI will hopefully be cheap).
876  auto *Caller = CandidateCall.getFunction();
877  if (Caller->hasMinSize()) {
878  DominatorTree DT(F);
879  LoopInfo LI(DT);
880  int NumLoops = 0;
881  for (Loop *L : LI) {
882  // Ignore loops that will not be executed
883  if (DeadBlocks.count(L->getHeader()))
884  continue;
885  NumLoops++;
886  }
887  addCost(NumLoops * InlineConstants::LoopPenalty);
888  }
889 
890  // We applied the maximum possible vector bonus at the beginning. Now,
891  // subtract the excess bonus, if any, from the Threshold before
892  // comparing against Cost.
893  if (NumVectorInstructions <= NumInstructions / 10)
894  Threshold -= VectorBonus;
895  else if (NumVectorInstructions <= NumInstructions / 2)
896  Threshold -= VectorBonus / 2;
897 
898  if (Optional<int> AttrCost =
899  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
900  Cost = *AttrCost;
901 
902  if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
903  CandidateCall,
905  Cost *= *AttrCostMult;
906 
907  if (Optional<int> AttrThreshold =
908  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
909  Threshold = *AttrThreshold;
910 
911  if (auto Result = costBenefitAnalysis()) {
912  DecidedByCostBenefit = true;
913  if (Result.getValue())
914  return InlineResult::success();
915  else
916  return InlineResult::failure("Cost over threshold.");
917  }
918 
919  if (IgnoreThreshold)
920  return InlineResult::success();
921 
922  DecidedByCostThreshold = true;
923  return Cost < std::max(1, Threshold)
925  : InlineResult::failure("Cost over threshold.");
926  }
927 
928  bool shouldStop() override {
929  if (IgnoreThreshold || ComputeFullInlineCost)
930  return false;
931  // Bail out the moment we cross the threshold. This means we'll under-count
932  // the cost, but only when undercounting doesn't matter.
933  if (Cost < Threshold)
934  return false;
935  DecidedByCostThreshold = true;
936  return true;
937  }
938 
939  void onLoadEliminationOpportunity() override {
940  LoadEliminationCost += InlineConstants::InstrCost;
941  }
942 
943  InlineResult onAnalysisStart() override {
944  // Perform some tweaks to the cost and threshold based on the direct
945  // callsite information.
946 
947  // We want to more aggressively inline vector-dense kernels, so up the
948  // threshold, and we'll lower it if the % of vector instructions gets too
949  // low. Note that these bonuses are some what arbitrary and evolved over
950  // time by accident as much as because they are principled bonuses.
951  //
952  // FIXME: It would be nice to remove all such bonuses. At least it would be
953  // nice to base the bonus values on something more scientific.
954  assert(NumInstructions == 0);
955  assert(NumVectorInstructions == 0);
956 
957  // Update the threshold based on callsite properties
958  updateThreshold(CandidateCall, F);
959 
960  // While Threshold depends on commandline options that can take negative
961  // values, we want to enforce the invariant that the computed threshold and
962  // bonuses are non-negative.
963  assert(Threshold >= 0);
964  assert(SingleBBBonus >= 0);
965  assert(VectorBonus >= 0);
966 
967  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
968  // this Threshold any time, and cost cannot decrease, we can stop processing
969  // the rest of the function body.
970  Threshold += (SingleBBBonus + VectorBonus);
971 
972  // Give out bonuses for the callsite, as the instructions setting them up
973  // will be gone after inlining.
974  addCost(-getCallsiteCost(this->CandidateCall, DL));
975 
976  // If this function uses the coldcc calling convention, prefer not to inline
977  // it.
978  if (F.getCallingConv() == CallingConv::Cold)
980 
981  // Check if we're done. This can happen due to bonuses and penalties.
982  if (Cost >= Threshold && !ComputeFullInlineCost)
983  return InlineResult::failure("high cost");
984 
985  return InlineResult::success();
986  }
987 
988 public:
989  InlineCostCallAnalyzer(
990  Function &Callee, CallBase &Call, const InlineParams &Params,
991  const TargetTransformInfo &TTI,
992  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
993  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
994  ProfileSummaryInfo *PSI = nullptr,
995  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
996  bool IgnoreThreshold = false)
997  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
998  ComputeFullInlineCost(OptComputeFullInlineCost ||
999  Params.ComputeFullInlineCost || ORE ||
1000  isCostBenefitAnalysisEnabled()),
1001  Params(Params), Threshold(Params.DefaultThreshold),
1002  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1003  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1004  Writer(this) {
1005  AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
1006  }
1007 
1008  /// Annotation Writer for instruction details
1009  InlineCostAnnotationWriter Writer;
1010 
1011  void dump();
1012 
1013  // Prints the same analysis as dump(), but its definition is not dependent
1014  // on the build.
1015  void print(raw_ostream &OS);
1016 
1017  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1018  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1019  return InstructionCostDetailMap[I];
1020  return None;
1021  }
1022 
1023  virtual ~InlineCostCallAnalyzer() = default;
1024  int getThreshold() const { return Threshold; }
1025  int getCost() const { return Cost; }
1026  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1027  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1028  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1029 };
1030 
1031 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1032 private:
1033  InlineCostFeatures Cost = {};
1034 
1035  // FIXME: These constants are taken from the heuristic-based cost visitor.
1036  // These should be removed entirely in a later revision to avoid reliance on
1037  // heuristics in the ML inliner.
1038  static constexpr int JTCostMultiplier = 4;
1039  static constexpr int CaseClusterCostMultiplier = 2;
1040  static constexpr int SwitchCostMultiplier = 2;
1041 
1042  // FIXME: These are taken from the heuristic-based cost visitor: we should
1043  // eventually abstract these to the CallAnalyzer to avoid duplication.
1044  unsigned SROACostSavingOpportunities = 0;
1045  int VectorBonus = 0;
1046  int SingleBBBonus = 0;
1047  int Threshold = 5;
1048 
1050 
1051  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1052  Cost[static_cast<size_t>(Feature)] += Delta;
1053  }
1054 
1055  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1056  Cost[static_cast<size_t>(Feature)] = Value;
1057  }
1058 
1059  void onDisableSROA(AllocaInst *Arg) override {
1060  auto CostIt = SROACosts.find(Arg);
1061  if (CostIt == SROACosts.end())
1062  return;
1063 
1064  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1065  SROACostSavingOpportunities -= CostIt->second;
1066  SROACosts.erase(CostIt);
1067  }
1068 
1069  void onDisableLoadElimination() override {
1070  set(InlineCostFeatureIndex::LoadElimination, 1);
1071  }
1072 
1073  void onCallPenalty() override {
1075  }
1076 
1077  void onCallArgumentSetup(const CallBase &Call) override {
1078  increment(InlineCostFeatureIndex::CallArgumentSetup,
1079  Call.arg_size() * InlineConstants::InstrCost);
1080  }
1081 
1082  void onLoadRelativeIntrinsic() override {
1083  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
1085  }
1086 
1087  void onLoweredCall(Function *F, CallBase &Call,
1088  bool IsIndirectCall) override {
1089  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1090  Call.arg_size() * InlineConstants::InstrCost);
1091 
1092  if (IsIndirectCall) {
1093  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1094  /*HintThreshold*/ {},
1095  /*ColdThreshold*/ {},
1096  /*OptSizeThreshold*/ {},
1097  /*OptMinSizeThreshold*/ {},
1098  /*HotCallSiteThreshold*/ {},
1099  /*LocallyHotCallSiteThreshold*/ {},
1100  /*ColdCallSiteThreshold*/ {},
1101  /*ComputeFullInlineCost*/ true,
1102  /*EnableDeferral*/ true};
1103  IndirectCallParams.DefaultThreshold =
1105 
1106  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1107  GetAssumptionCache, GetBFI, PSI, ORE, false,
1108  true);
1109  if (CA.analyze().isSuccess()) {
1110  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1111  CA.getCost());
1112  increment(InlineCostFeatureIndex::NestedInlines, 1);
1113  }
1114  } else {
1115  onCallPenalty();
1116  }
1117  }
1118 
1119  void onFinalizeSwitch(unsigned JumpTableSize,
1120  unsigned NumCaseCluster) override {
1121 
1122  if (JumpTableSize) {
1123  int64_t JTCost =
1124  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
1125  JTCostMultiplier * InlineConstants::InstrCost;
1126  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1127  return;
1128  }
1129 
1130  if (NumCaseCluster <= 3) {
1131  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1132  NumCaseCluster * CaseClusterCostMultiplier *
1134  return;
1135  }
1136 
1137  int64_t ExpectedNumberOfCompare =
1138  getExpectedNumberOfCompare(NumCaseCluster);
1139 
1140  int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
1142  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1143  }
1144 
1145  void onMissedSimplification() override {
1146  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1148  }
1149 
1150  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1151  void onAggregateSROAUse(AllocaInst *Arg) override {
1152  SROACosts.find(Arg)->second += InlineConstants::InstrCost;
1153  SROACostSavingOpportunities += InlineConstants::InstrCost;
1154  }
1155 
1156  void onBlockAnalyzed(const BasicBlock *BB) override {
1157  if (BB->getTerminator()->getNumSuccessors() > 1)
1158  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1159  Threshold -= SingleBBBonus;
1160  }
1161 
1162  InlineResult finalizeAnalysis() override {
1163  auto *Caller = CandidateCall.getFunction();
1164  if (Caller->hasMinSize()) {
1165  DominatorTree DT(F);
1166  LoopInfo LI(DT);
1167  for (Loop *L : LI) {
1168  // Ignore loops that will not be executed
1169  if (DeadBlocks.count(L->getHeader()))
1170  continue;
1171  increment(InlineCostFeatureIndex::NumLoops,
1173  }
1174  }
1175  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1176  set(InlineCostFeatureIndex::SimplifiedInstructions,
1177  NumInstructionsSimplified);
1178  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1179  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1180  NumConstantOffsetPtrArgs);
1181  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1182 
1183  if (NumVectorInstructions <= NumInstructions / 10)
1184  Threshold -= VectorBonus;
1185  else if (NumVectorInstructions <= NumInstructions / 2)
1186  Threshold -= VectorBonus / 2;
1187 
1188  set(InlineCostFeatureIndex::Threshold, Threshold);
1189 
1190  return InlineResult::success();
1191  }
1192 
1193  bool shouldStop() override { return false; }
1194 
1195  void onLoadEliminationOpportunity() override {
1196  increment(InlineCostFeatureIndex::LoadElimination, 1);
1197  }
1198 
1199  InlineResult onAnalysisStart() override {
1200  increment(InlineCostFeatureIndex::CallSiteCost,
1201  -1 * getCallsiteCost(this->CandidateCall, DL));
1202 
1203  set(InlineCostFeatureIndex::ColdCcPenalty,
1204  (F.getCallingConv() == CallingConv::Cold));
1205 
1207  (F.hasLocalLinkage() && F.hasOneLiveUse() &&
1208  &F == CandidateCall.getCalledFunction()));
1209 
1210  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1211  // analyzer - instead, we should abstract it to a common method in the
1212  // CallAnalyzer
1213  int SingleBBBonusPercent = 50;
1214  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1215  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1216  Threshold *= TTI.getInliningThresholdMultiplier();
1217  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1218  VectorBonus = Threshold * VectorBonusPercent / 100;
1219  Threshold += (SingleBBBonus + VectorBonus);
1220 
1221  return InlineResult::success();
1222  }
1223 
1224 public:
1225  InlineCostFeaturesAnalyzer(
1226  const TargetTransformInfo &TTI,
1227  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1230  CallBase &Call)
1231  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1232 
1233  const InlineCostFeatures &features() const { return Cost; }
1234 };
1235 
1236 } // namespace
1237 
1238 /// Test whether the given value is an Alloca-derived function argument.
1239 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1240  return SROAArgValues.count(V);
1241 }
1242 
1243 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1244  onDisableSROA(SROAArg);
1245  EnabledSROAAllocas.erase(SROAArg);
1246  disableLoadElimination();
1247 }
1248 
1249 void InlineCostAnnotationWriter::emitInstructionAnnot(
1250  const Instruction *I, formatted_raw_ostream &OS) {
1251  // The cost of inlining of the given instruction is printed always.
1252  // The threshold delta is printed only when it is non-zero. It happens
1253  // when we decided to give a bonus at a particular instruction.
1254  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1255  if (!Record)
1256  OS << "; No analysis for the instruction";
1257  else {
1258  OS << "; cost before = " << Record->CostBefore
1259  << ", cost after = " << Record->CostAfter
1260  << ", threshold before = " << Record->ThresholdBefore
1261  << ", threshold after = " << Record->ThresholdAfter << ", ";
1262  OS << "cost delta = " << Record->getCostDelta();
1263  if (Record->hasThresholdChanged())
1264  OS << ", threshold delta = " << Record->getThresholdDelta();
1265  }
1266  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1267  if (C) {
1268  OS << ", simplified to ";
1269  C.getValue()->print(OS, true);
1270  }
1271  OS << "\n";
1272 }
1273 
1274 /// If 'V' maps to a SROA candidate, disable SROA for it.
1275 void CallAnalyzer::disableSROA(Value *V) {
1276  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1277  disableSROAForArg(SROAArg);
1278  }
1279 }
1280 
1281 void CallAnalyzer::disableLoadElimination() {
1282  if (EnableLoadElimination) {
1283  onDisableLoadElimination();
1284  EnableLoadElimination = false;
1285  }
1286 }
1287 
1288 /// Accumulate a constant GEP offset into an APInt if possible.
1289 ///
1290 /// Returns false if unable to compute the offset for any reason. Respects any
1291 /// simplified values known during the analysis of this callsite.
1292 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1293  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1294  assert(IntPtrWidth == Offset.getBitWidth());
1295 
1296  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1297  GTI != GTE; ++GTI) {
1298  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1299  if (!OpC)
1300  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1301  OpC = dyn_cast<ConstantInt>(SimpleOp);
1302  if (!OpC)
1303  return false;
1304  if (OpC->isZero())
1305  continue;
1306 
1307  // Handle a struct index, which adds its field offset to the pointer.
1308  if (StructType *STy = GTI.getStructTypeOrNull()) {
1309  unsigned ElementIdx = OpC->getZExtValue();
1310  const StructLayout *SL = DL.getStructLayout(STy);
1311  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1312  continue;
1313  }
1314 
1315  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1316  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1317  }
1318  return true;
1319 }
1320 
1321 /// Use TTI to check whether a GEP is free.
1322 ///
1323 /// Respects any simplified values known during the analysis of this callsite.
1324 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1326  Operands.push_back(GEP.getOperand(0));
1327  for (const Use &Op : GEP.indices())
1328  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1329  Operands.push_back(SimpleOp);
1330  else
1331  Operands.push_back(Op);
1332  return TTI.getUserCost(&GEP, Operands,
1335 }
1336 
1337 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1338  disableSROA(I.getOperand(0));
1339 
1340  // Check whether inlining will turn a dynamic alloca into a static
1341  // alloca and handle that case.
1342  if (I.isArrayAllocation()) {
1343  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1344  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1345  // Sometimes a dynamic alloca could be converted into a static alloca
1346  // after this constant prop, and become a huge static alloca on an
1347  // unconditional CFG path. Avoid inlining if this is going to happen above
1348  // a threshold.
1349  // FIXME: If the threshold is removed or lowered too much, we could end up
1350  // being too pessimistic and prevent inlining non-problematic code. This
1351  // could result in unintended perf regressions. A better overall strategy
1352  // is needed to track stack usage during inlining.
1353  Type *Ty = I.getAllocatedType();
1354  AllocatedSize = SaturatingMultiplyAdd(
1355  AllocSize->getLimitedValue(),
1356  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1358  HasDynamicAlloca = true;
1359  return false;
1360  }
1361  }
1362 
1363  // Accumulate the allocated size.
1364  if (I.isStaticAlloca()) {
1365  Type *Ty = I.getAllocatedType();
1366  AllocatedSize =
1367  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1368  }
1369 
1370  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1371  // a variety of reasons, and so we would like to not inline them into
1372  // functions which don't currently have a dynamic alloca. This simply
1373  // disables inlining altogether in the presence of a dynamic alloca.
1374  if (!I.isStaticAlloca())
1375  HasDynamicAlloca = true;
1376 
1377  return false;
1378 }
1379 
1380 bool CallAnalyzer::visitPHI(PHINode &I) {
1381  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1382  // though we don't want to propagate it's bonuses. The idea is to disable
1383  // SROA if it *might* be used in an inappropriate manner.
1384 
1385  // Phi nodes are always zero-cost.
1386  // FIXME: Pointer sizes may differ between different address spaces, so do we
1387  // need to use correct address space in the call to getPointerSizeInBits here?
1388  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1389  // see the ZeroOffset is used as a dummy value, so we can probably use any
1390  // bit width for the ZeroOffset?
1391  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1392  bool CheckSROA = I.getType()->isPointerTy();
1393 
1394  // Track the constant or pointer with constant offset we've seen so far.
1395  Constant *FirstC = nullptr;
1396  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1397  Value *FirstV = nullptr;
1398 
1399  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1400  BasicBlock *Pred = I.getIncomingBlock(i);
1401  // If the incoming block is dead, skip the incoming block.
1402  if (DeadBlocks.count(Pred))
1403  continue;
1404  // If the parent block of phi is not the known successor of the incoming
1405  // block, skip the incoming block.
1406  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1407  if (KnownSuccessor && KnownSuccessor != I.getParent())
1408  continue;
1409 
1410  Value *V = I.getIncomingValue(i);
1411  // If the incoming value is this phi itself, skip the incoming value.
1412  if (&I == V)
1413  continue;
1414 
1415  Constant *C = dyn_cast<Constant>(V);
1416  if (!C)
1417  C = SimplifiedValues.lookup(V);
1418 
1419  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1420  if (!C && CheckSROA)
1421  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1422 
1423  if (!C && !BaseAndOffset.first)
1424  // The incoming value is neither a constant nor a pointer with constant
1425  // offset, exit early.
1426  return true;
1427 
1428  if (FirstC) {
1429  if (FirstC == C)
1430  // If we've seen a constant incoming value before and it is the same
1431  // constant we see this time, continue checking the next incoming value.
1432  continue;
1433  // Otherwise early exit because we either see a different constant or saw
1434  // a constant before but we have a pointer with constant offset this time.
1435  return true;
1436  }
1437 
1438  if (FirstV) {
1439  // The same logic as above, but check pointer with constant offset here.
1440  if (FirstBaseAndOffset == BaseAndOffset)
1441  continue;
1442  return true;
1443  }
1444 
1445  if (C) {
1446  // This is the 1st time we've seen a constant, record it.
1447  FirstC = C;
1448  continue;
1449  }
1450 
1451  // The remaining case is that this is the 1st time we've seen a pointer with
1452  // constant offset, record it.
1453  FirstV = V;
1454  FirstBaseAndOffset = BaseAndOffset;
1455  }
1456 
1457  // Check if we can map phi to a constant.
1458  if (FirstC) {
1459  SimplifiedValues[&I] = FirstC;
1460  return true;
1461  }
1462 
1463  // Check if we can map phi to a pointer with constant offset.
1464  if (FirstBaseAndOffset.first) {
1465  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1466 
1467  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1468  SROAArgValues[&I] = SROAArg;
1469  }
1470 
1471  return true;
1472 }
1473 
1474 /// Check we can fold GEPs of constant-offset call site argument pointers.
1475 /// This requires target data and inbounds GEPs.
1476 ///
1477 /// \return true if the specified GEP can be folded.
1478 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1479  // Check if we have a base + offset for the pointer.
1480  std::pair<Value *, APInt> BaseAndOffset =
1481  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1482  if (!BaseAndOffset.first)
1483  return false;
1484 
1485  // Check if the offset of this GEP is constant, and if so accumulate it
1486  // into Offset.
1487  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1488  return false;
1489 
1490  // Add the result as a new mapping to Base + Offset.
1491  ConstantOffsetPtrs[&I] = BaseAndOffset;
1492 
1493  return true;
1494 }
1495 
1496 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1497  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1498 
1499  // Lambda to check whether a GEP's indices are all constant.
1500  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1501  for (const Use &Op : GEP.indices())
1502  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1503  return false;
1504  return true;
1505  };
1506 
1508  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1510  for (unsigned int Index = 1; Index < COps.size(); ++Index)
1511  Indices.push_back(COps[Index]);
1513  I.getSourceElementType(), COps[0], Indices, I.isInBounds());
1514  }))
1515  return true;
1516 
1517  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1518  if (SROAArg)
1519  SROAArgValues[&I] = SROAArg;
1520 
1521  // Constant GEPs are modeled as free.
1522  return true;
1523  }
1524 
1525  // Variable GEPs will require math and will disable SROA.
1526  if (SROAArg)
1527  disableSROAForArg(SROAArg);
1528  return isGEPFree(I);
1529 }
1530 
1531 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1532 /// \p Evaluate is a callable specific to instruction type that evaluates the
1533 /// instruction when all the operands are constants.
1534 template <typename Callable>
1535 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
1537  for (Value *Op : I.operands()) {
1538  Constant *COp = dyn_cast<Constant>(Op);
1539  if (!COp)
1540  COp = SimplifiedValues.lookup(Op);
1541  if (!COp)
1542  return false;
1543  COps.push_back(COp);
1544  }
1545  auto *C = Evaluate(COps);
1546  if (!C)
1547  return false;
1548  SimplifiedValues[&I] = C;
1549  return true;
1550 }
1551 
1552 /// Try to simplify a call to llvm.is.constant.
1553 ///
1554 /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1555 /// we expect calls of this specific intrinsic to be infrequent.
1556 ///
1557 /// FIXME: Given that we know CB's parent (F) caller
1558 /// (CandidateCall->getParent()->getParent()), we might be able to determine
1559 /// whether inlining F into F's caller would change how the call to
1560 /// llvm.is.constant would evaluate.
1561 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1562  Value *Arg = CB.getArgOperand(0);
1563  auto *C = dyn_cast<Constant>(Arg);
1564 
1565  if (!C)
1566  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
1567 
1568  Type *RT = CB.getFunctionType()->getReturnType();
1569  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1570  return true;
1571 }
1572 
1573 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1574  // Propagate constants through bitcasts.
1575  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1576  return ConstantExpr::getBitCast(COps[0], I.getType());
1577  }))
1578  return true;
1579 
1580  // Track base/offsets through casts
1581  std::pair<Value *, APInt> BaseAndOffset =
1582  ConstantOffsetPtrs.lookup(I.getOperand(0));
1583  // Casts don't change the offset, just wrap it up.
1584  if (BaseAndOffset.first)
1585  ConstantOffsetPtrs[&I] = BaseAndOffset;
1586 
1587  // Also look for SROA candidates here.
1588  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1589  SROAArgValues[&I] = SROAArg;
1590 
1591  // Bitcasts are always zero cost.
1592  return true;
1593 }
1594 
1595 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1596  // Propagate constants through ptrtoint.
1597  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1598  return ConstantExpr::getPtrToInt(COps[0], I.getType());
1599  }))
1600  return true;
1601 
1602  // Track base/offset pairs when converted to a plain integer provided the
1603  // integer is large enough to represent the pointer.
1604  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1605  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1606  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1607  std::pair<Value *, APInt> BaseAndOffset =
1608  ConstantOffsetPtrs.lookup(I.getOperand(0));
1609  if (BaseAndOffset.first)
1610  ConstantOffsetPtrs[&I] = BaseAndOffset;
1611  }
1612 
1613  // This is really weird. Technically, ptrtoint will disable SROA. However,
1614  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1615  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1616  // would block SROA would also block SROA if applied directly to a pointer,
1617  // and so we can just add the integer in here. The only places where SROA is
1618  // preserved either cannot fire on an integer, or won't in-and-of themselves
1619  // disable SROA (ext) w/o some later use that we would see and disable.
1620  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1621  SROAArgValues[&I] = SROAArg;
1622 
1625 }
1626 
1627 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1628  // Propagate constants through ptrtoint.
1629  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1630  return ConstantExpr::getIntToPtr(COps[0], I.getType());
1631  }))
1632  return true;
1633 
1634  // Track base/offset pairs when round-tripped through a pointer without
1635  // modifications provided the integer is not too large.
1636  Value *Op = I.getOperand(0);
1637  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1638  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1639  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1640  if (BaseAndOffset.first)
1641  ConstantOffsetPtrs[&I] = BaseAndOffset;
1642  }
1643 
1644  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1645  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1646  SROAArgValues[&I] = SROAArg;
1647 
1650 }
1651 
1652 bool CallAnalyzer::visitCastInst(CastInst &I) {
1653  // Propagate constants through casts.
1654  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1655  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
1656  }))
1657  return true;
1658 
1659  // Disable SROA in the face of arbitrary casts we don't explicitly list
1660  // elsewhere.
1661  disableSROA(I.getOperand(0));
1662 
1663  // If this is a floating-point cast, and the target says this operation
1664  // is expensive, this may eventually become a library call. Treat the cost
1665  // as such.
1666  switch (I.getOpcode()) {
1667  case Instruction::FPTrunc:
1668  case Instruction::FPExt:
1669  case Instruction::UIToFP:
1670  case Instruction::SIToFP:
1671  case Instruction::FPToUI:
1672  case Instruction::FPToSI:
1674  onCallPenalty();
1675  break;
1676  default:
1677  break;
1678  }
1679 
1682 }
1683 
1684 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1685  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1686 }
1687 
1688 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1689  // Does the *call site* have the NonNull attribute set on an argument? We
1690  // use the attribute on the call site to memoize any analysis done in the
1691  // caller. This will also trip if the callee function has a non-null
1692  // parameter attribute, but that's a less interesting case because hopefully
1693  // the callee would already have been simplified based on that.
1694  if (Argument *A = dyn_cast<Argument>(V))
1695  if (paramHasAttr(A, Attribute::NonNull))
1696  return true;
1697 
1698  // Is this an alloca in the caller? This is distinct from the attribute case
1699  // above because attributes aren't updated within the inliner itself and we
1700  // always want to catch the alloca derived case.
1701  if (isAllocaDerivedArg(V))
1702  // We can actually predict the result of comparisons between an
1703  // alloca-derived value and null. Note that this fires regardless of
1704  // SROA firing.
1705  return true;
1706 
1707  return false;
1708 }
1709 
1710 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1711  // If the normal destination of the invoke or the parent block of the call
1712  // site is unreachable-terminated, there is little point in inlining this
1713  // unless there is literally zero cost.
1714  // FIXME: Note that it is possible that an unreachable-terminated block has a
1715  // hot entry. For example, in below scenario inlining hot_call_X() may be
1716  // beneficial :
1717  // main() {
1718  // hot_call_1();
1719  // ...
1720  // hot_call_N()
1721  // exit(0);
1722  // }
1723  // For now, we are not handling this corner case here as it is rare in real
1724  // code. In future, we should elaborate this based on BPI and BFI in more
1725  // general threshold adjusting heuristics in updateThreshold().
1726  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1727  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1728  return false;
1729  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1730  return false;
1731 
1732  return true;
1733 }
1734 
1736  BlockFrequencyInfo *CallerBFI) {
1737  // If global profile summary is available, then callsite's coldness is
1738  // determined based on that.
1739  if (PSI && PSI->hasProfileSummary())
1740  return PSI->isColdCallSite(Call, CallerBFI);
1741 
1742  // Otherwise we need BFI to be available.
1743  if (!CallerBFI)
1744  return false;
1745 
1746  // Determine if the callsite is cold relative to caller's entry. We could
1747  // potentially cache the computation of scaled entry frequency, but the added
1748  // complexity is not worth it unless this scaling shows up high in the
1749  // profiles.
1750  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1751  auto CallSiteBB = Call.getParent();
1752  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1753  auto CallerEntryFreq =
1754  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1755  return CallSiteFreq < CallerEntryFreq * ColdProb;
1756 }
1757 
1759 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1760  BlockFrequencyInfo *CallerBFI) {
1761 
1762  // If global profile summary is available, then callsite's hotness is
1763  // determined based on that.
1764  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1765  return Params.HotCallSiteThreshold;
1766 
1767  // Otherwise we need BFI to be available and to have a locally hot callsite
1768  // threshold.
1769  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1770  return None;
1771 
1772  // Determine if the callsite is hot relative to caller's entry. We could
1773  // potentially cache the computation of scaled entry frequency, but the added
1774  // complexity is not worth it unless this scaling shows up high in the
1775  // profiles.
1776  auto CallSiteBB = Call.getParent();
1777  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1778  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1779  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1780  return Params.LocallyHotCallSiteThreshold;
1781 
1782  // Otherwise treat it normally.
1783  return None;
1784 }
1785 
1786 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1787  // If no size growth is allowed for this inlining, set Threshold to 0.
1788  if (!allowSizeGrowth(Call)) {
1789  Threshold = 0;
1790  return;
1791  }
1792 
1793  Function *Caller = Call.getCaller();
1794 
1795  // return min(A, B) if B is valid.
1796  auto MinIfValid = [](int A, Optional<int> B) {
1797  return B ? std::min(A, B.getValue()) : A;
1798  };
1799 
1800  // return max(A, B) if B is valid.
1801  auto MaxIfValid = [](int A, Optional<int> B) {
1802  return B ? std::max(A, B.getValue()) : A;
1803  };
1804 
1805  // Various bonus percentages. These are multiplied by Threshold to get the
1806  // bonus values.
1807  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1808  // basic block at the given callsite context. This is speculatively applied
1809  // and withdrawn if more than one basic block is seen.
1810  //
1811  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1812  // of the last call to a static function as inlining such functions is
1813  // guaranteed to reduce code size.
1814  //
1815  // These bonus percentages may be set to 0 based on properties of the caller
1816  // and the callsite.
1817  int SingleBBBonusPercent = 50;
1818  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1820 
1821  // Lambda to set all the above bonus and bonus percentages to 0.
1822  auto DisallowAllBonuses = [&]() {
1823  SingleBBBonusPercent = 0;
1824  VectorBonusPercent = 0;
1826  };
1827 
1828  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1829  // and reduce the threshold if the caller has the necessary attribute.
1830  if (Caller->hasMinSize()) {
1831  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1832  // For minsize, we want to disable the single BB bonus and the vector
1833  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1834  // a static function will, at the minimum, eliminate the parameter setup and
1835  // call/return instructions.
1836  SingleBBBonusPercent = 0;
1837  VectorBonusPercent = 0;
1838  } else if (Caller->hasOptSize())
1839  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1840 
1841  // Adjust the threshold based on inlinehint attribute and profile based
1842  // hotness information if the caller does not have MinSize attribute.
1843  if (!Caller->hasMinSize()) {
1844  if (Callee.hasFnAttribute(Attribute::InlineHint))
1845  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1846 
1847  // FIXME: After switching to the new passmanager, simplify the logic below
1848  // by checking only the callsite hotness/coldness as we will reliably
1849  // have local profile information.
1850  //
1851  // Callsite hotness and coldness can be determined if sample profile is
1852  // used (which adds hotness metadata to calls) or if caller's
1853  // BlockFrequencyInfo is available.
1854  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1855  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1856  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1857  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1858  // FIXME: This should update the threshold only if it exceeds the
1859  // current threshold, but AutoFDO + ThinLTO currently relies on this
1860  // behavior to prevent inlining of hot callsites during ThinLTO
1861  // compile phase.
1862  Threshold = HotCallSiteThreshold.getValue();
1863  } else if (isColdCallSite(Call, CallerBFI)) {
1864  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1865  // Do not apply bonuses for a cold callsite including the
1866  // LastCallToStatic bonus. While this bonus might result in code size
1867  // reduction, it can cause the size of a non-cold caller to increase
1868  // preventing it from being inlined.
1869  DisallowAllBonuses();
1870  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1871  } else if (PSI) {
1872  // Use callee's global profile information only if we have no way of
1873  // determining this via callsite information.
1874  if (PSI->isFunctionEntryHot(&Callee)) {
1875  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1876  // If callsite hotness can not be determined, we may still know
1877  // that the callee is hot and treat it as a weaker hint for threshold
1878  // increase.
1879  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1880  } else if (PSI->isFunctionEntryCold(&Callee)) {
1881  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1882  // Do not apply bonuses for a cold callee including the
1883  // LastCallToStatic bonus. While this bonus might result in code size
1884  // reduction, it can cause the size of a non-cold caller to increase
1885  // preventing it from being inlined.
1886  DisallowAllBonuses();
1887  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1888  }
1889  }
1890  }
1891 
1892  Threshold += TTI.adjustInliningThreshold(&Call);
1893 
1894  // Finally, take the target-specific inlining threshold multiplier into
1895  // account.
1896  Threshold *= TTI.getInliningThresholdMultiplier();
1897 
1898  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1899  VectorBonus = Threshold * VectorBonusPercent / 100;
1900 
1901  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
1902  &F == Call.getCalledFunction();
1903  // If there is only one call of the function, and it has internal linkage,
1904  // the cost of inlining it drops dramatically. It may seem odd to update
1905  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1906  if (OnlyOneCallAndLocalLinkage)
1907  Cost -= LastCallToStaticBonus;
1908 }
1909 
1910 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1911  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1912  // First try to handle simplified comparisons.
1913  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1914  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
1915  }))
1916  return true;
1917 
1918  if (I.getOpcode() == Instruction::FCmp)
1919  return false;
1920 
1921  // Otherwise look for a comparison between constant offset pointers with
1922  // a common base.
1923  Value *LHSBase, *RHSBase;
1924  APInt LHSOffset, RHSOffset;
1925  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1926  if (LHSBase) {
1927  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1928  if (RHSBase && LHSBase == RHSBase) {
1929  // We have common bases, fold the icmp to a constant based on the
1930  // offsets.
1931  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1932  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1933  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1934  SimplifiedValues[&I] = C;
1935  ++NumConstantPtrCmps;
1936  return true;
1937  }
1938  }
1939  }
1940 
1941  // If the comparison is an equality comparison with null, we can simplify it
1942  // if we know the value (argument) can't be null
1943  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1944  isKnownNonNullInCallee(I.getOperand(0))) {
1945  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1946  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1947  : ConstantInt::getFalse(I.getType());
1948  return true;
1949  }
1950  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1951 }
1952 
1953 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1954  // Try to handle a special case: we can fold computing the difference of two
1955  // constant-related pointers.
1956  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1957  Value *LHSBase, *RHSBase;
1958  APInt LHSOffset, RHSOffset;
1959  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1960  if (LHSBase) {
1961  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1962  if (RHSBase && LHSBase == RHSBase) {
1963  // We have common bases, fold the subtract to a constant based on the
1964  // offsets.
1965  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1966  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1967  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1968  SimplifiedValues[&I] = C;
1969  ++NumConstantPtrDiffs;
1970  return true;
1971  }
1972  }
1973  }
1974 
1975  // Otherwise, fall back to the generic logic for simplifying and handling
1976  // instructions.
1977  return Base::visitSub(I);
1978 }
1979 
1980 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1981  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1982  Constant *CLHS = dyn_cast<Constant>(LHS);
1983  if (!CLHS)
1984  CLHS = SimplifiedValues.lookup(LHS);
1985  Constant *CRHS = dyn_cast<Constant>(RHS);
1986  if (!CRHS)
1987  CRHS = SimplifiedValues.lookup(RHS);
1988 
1989  Value *SimpleV = nullptr;
1990  if (auto FI = dyn_cast<FPMathOperator>(&I))
1991  SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
1992  FI->getFastMathFlags(), DL);
1993  else
1994  SimpleV =
1995  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1996 
1997  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1998  SimplifiedValues[&I] = C;
1999 
2000  if (SimpleV)
2001  return true;
2002 
2003  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2004  disableSROA(LHS);
2005  disableSROA(RHS);
2006 
2007  // If the instruction is floating point, and the target says this operation
2008  // is expensive, this may eventually become a library call. Treat the cost
2009  // as such. Unless it's fneg which can be implemented with an xor.
2010  using namespace llvm::PatternMatch;
2011  if (I.getType()->isFloatingPointTy() &&
2013  !match(&I, m_FNeg(m_Value())))
2014  onCallPenalty();
2015 
2016  return false;
2017 }
2018 
2019 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2020  Value *Op = I.getOperand(0);
2021  Constant *COp = dyn_cast<Constant>(Op);
2022  if (!COp)
2023  COp = SimplifiedValues.lookup(Op);
2024 
2025  Value *SimpleV = SimplifyFNegInst(
2026  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2027 
2028  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2029  SimplifiedValues[&I] = C;
2030 
2031  if (SimpleV)
2032  return true;
2033 
2034  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2035  disableSROA(Op);
2036 
2037  return false;
2038 }
2039 
2040 bool CallAnalyzer::visitLoad(LoadInst &I) {
2041  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2042  return true;
2043 
2044  // If the data is already loaded from this address and hasn't been clobbered
2045  // by any stores or calls, this load is likely to be redundant and can be
2046  // eliminated.
2047  if (EnableLoadElimination &&
2048  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2049  onLoadEliminationOpportunity();
2050  return true;
2051  }
2052 
2053  return false;
2054 }
2055 
2056 bool CallAnalyzer::visitStore(StoreInst &I) {
2057  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2058  return true;
2059 
2060  // The store can potentially clobber loads and prevent repeated loads from
2061  // being eliminated.
2062  // FIXME:
2063  // 1. We can probably keep an initial set of eliminatable loads substracted
2064  // from the cost even when we finally see a store. We just need to disable
2065  // *further* accumulation of elimination savings.
2066  // 2. We should probably at some point thread MemorySSA for the callee into
2067  // this and then use that to actually compute *really* precise savings.
2068  disableLoadElimination();
2069  return false;
2070 }
2071 
2072 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2073  // Constant folding for extract value is trivial.
2074  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2075  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
2076  }))
2077  return true;
2078 
2079  // SROA can't look through these, but they may be free.
2080  return Base::visitExtractValue(I);
2081 }
2082 
2083 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2084  // Constant folding for insert value is trivial.
2085  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2086  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
2087  /*InsertedValueOperand*/ COps[1],
2088  I.getIndices());
2089  }))
2090  return true;
2091 
2092  // SROA can't look through these, but they may be free.
2093  return Base::visitInsertValue(I);
2094 }
2095 
2096 /// Try to simplify a call site.
2097 ///
2098 /// Takes a concrete function and callsite and tries to actually simplify it by
2099 /// analyzing the arguments and call itself with instsimplify. Returns true if
2100 /// it has simplified the callsite to some other entity (a constant), making it
2101 /// free.
2102 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2103  // FIXME: Using the instsimplify logic directly for this is inefficient
2104  // because we have to continually rebuild the argument list even when no
2105  // simplifications can be performed. Until that is fixed with remapping
2106  // inside of instsimplify, directly constant fold calls here.
2107  if (!canConstantFoldCallTo(&Call, F))
2108  return false;
2109 
2110  // Try to re-map the arguments to constants.
2111  SmallVector<Constant *, 4> ConstantArgs;
2112  ConstantArgs.reserve(Call.arg_size());
2113  for (Value *I : Call.args()) {
2114  Constant *C = dyn_cast<Constant>(I);
2115  if (!C)
2116  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2117  if (!C)
2118  return false; // This argument doesn't map to a constant.
2119 
2120  ConstantArgs.push_back(C);
2121  }
2122  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2123  SimplifiedValues[&Call] = C;
2124  return true;
2125  }
2126 
2127  return false;
2128 }
2129 
2130 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2131  if (!onCallBaseVisitStart(Call))
2132  return true;
2133 
2134  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2135  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2136  // This aborts the entire analysis.
2137  ExposesReturnsTwice = true;
2138  return false;
2139  }
2140  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2141  ContainsNoDuplicateCall = true;
2142 
2143  Function *F = Call.getCalledFunction();
2144  bool IsIndirectCall = !F;
2145  if (IsIndirectCall) {
2146  // Check if this happens to be an indirect function call to a known function
2147  // in this inline context. If not, we've done all we can.
2148  Value *Callee = Call.getCalledOperand();
2149  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2150  if (!F || F->getFunctionType() != Call.getFunctionType()) {
2151  onCallArgumentSetup(Call);
2152 
2153  if (!Call.onlyReadsMemory())
2154  disableLoadElimination();
2155  return Base::visitCallBase(Call);
2156  }
2157  }
2158 
2159  assert(F && "Expected a call to a known function");
2160 
2161  // When we have a concrete function, first try to simplify it directly.
2162  if (simplifyCallSite(F, Call))
2163  return true;
2164 
2165  // Next check if it is an intrinsic we know about.
2166  // FIXME: Lift this into part of the InstVisitor.
2167  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2168  switch (II->getIntrinsicID()) {
2169  default:
2170  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2171  disableLoadElimination();
2172  return Base::visitCallBase(Call);
2173 
2174  case Intrinsic::load_relative:
2175  onLoadRelativeIntrinsic();
2176  return false;
2177 
2178  case Intrinsic::memset:
2179  case Intrinsic::memcpy:
2180  case Intrinsic::memmove:
2181  disableLoadElimination();
2182  // SROA can usually chew through these intrinsics, but they aren't free.
2183  return false;
2184  case Intrinsic::icall_branch_funnel:
2185  case Intrinsic::localescape:
2186  HasUninlineableIntrinsic = true;
2187  return false;
2188  case Intrinsic::vastart:
2189  InitsVargArgs = true;
2190  return false;
2191  case Intrinsic::launder_invariant_group:
2192  case Intrinsic::strip_invariant_group:
2193  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2194  SROAArgValues[II] = SROAArg;
2195  return true;
2196  case Intrinsic::is_constant:
2197  return simplifyIntrinsicCallIsConstant(Call);
2198  }
2199  }
2200 
2201  if (F == Call.getFunction()) {
2202  // This flag will fully abort the analysis, so don't bother with anything
2203  // else.
2204  IsRecursiveCall = true;
2205  if (!AllowRecursiveCall)
2206  return false;
2207  }
2208 
2209  if (TTI.isLoweredToCall(F)) {
2210  onLoweredCall(F, Call, IsIndirectCall);
2211  }
2212 
2213  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2214  disableLoadElimination();
2215  return Base::visitCallBase(Call);
2216 }
2217 
2218 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2219  // At least one return instruction will be free after inlining.
2220  bool Free = !HasReturn;
2221  HasReturn = true;
2222  return Free;
2223 }
2224 
2225 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2226  // We model unconditional branches as essentially free -- they really
2227  // shouldn't exist at all, but handling them makes the behavior of the
2228  // inliner more regular and predictable. Interestingly, conditional branches
2229  // which will fold away are also free.
2230  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2231  isa_and_nonnull<ConstantInt>(
2232  SimplifiedValues.lookup(BI.getCondition()));
2233 }
2234 
2235 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2236  bool CheckSROA = SI.getType()->isPointerTy();
2237  Value *TrueVal = SI.getTrueValue();
2238  Value *FalseVal = SI.getFalseValue();
2239 
2240  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2241  if (!TrueC)
2242  TrueC = SimplifiedValues.lookup(TrueVal);
2243  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2244  if (!FalseC)
2245  FalseC = SimplifiedValues.lookup(FalseVal);
2246  Constant *CondC =
2247  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2248 
2249  if (!CondC) {
2250  // Select C, X, X => X
2251  if (TrueC == FalseC && TrueC) {
2252  SimplifiedValues[&SI] = TrueC;
2253  return true;
2254  }
2255 
2256  if (!CheckSROA)
2257  return Base::visitSelectInst(SI);
2258 
2259  std::pair<Value *, APInt> TrueBaseAndOffset =
2260  ConstantOffsetPtrs.lookup(TrueVal);
2261  std::pair<Value *, APInt> FalseBaseAndOffset =
2262  ConstantOffsetPtrs.lookup(FalseVal);
2263  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2264  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2265 
2266  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2267  SROAArgValues[&SI] = SROAArg;
2268  return true;
2269  }
2270 
2271  return Base::visitSelectInst(SI);
2272  }
2273 
2274  // Select condition is a constant.
2275  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2276  : (CondC->isNullValue()) ? FalseVal
2277  : nullptr;
2278  if (!SelectedV) {
2279  // Condition is a vector constant that is not all 1s or all 0s. If all
2280  // operands are constants, ConstantExpr::getSelect() can handle the cases
2281  // such as select vectors.
2282  if (TrueC && FalseC) {
2283  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2284  SimplifiedValues[&SI] = C;
2285  return true;
2286  }
2287  }
2288  return Base::visitSelectInst(SI);
2289  }
2290 
2291  // Condition is either all 1s or all 0s. SI can be simplified.
2292  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2293  SimplifiedValues[&SI] = SelectedC;
2294  return true;
2295  }
2296 
2297  if (!CheckSROA)
2298  return true;
2299 
2300  std::pair<Value *, APInt> BaseAndOffset =
2301  ConstantOffsetPtrs.lookup(SelectedV);
2302  if (BaseAndOffset.first) {
2303  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2304 
2305  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2306  SROAArgValues[&SI] = SROAArg;
2307  }
2308 
2309  return true;
2310 }
2311 
2312 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2313  // We model unconditional switches as free, see the comments on handling
2314  // branches.
2315  if (isa<ConstantInt>(SI.getCondition()))
2316  return true;
2317  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2318  if (isa<ConstantInt>(V))
2319  return true;
2320 
2321  // Assume the most general case where the switch is lowered into
2322  // either a jump table, bit test, or a balanced binary tree consisting of
2323  // case clusters without merging adjacent clusters with the same
2324  // destination. We do not consider the switches that are lowered with a mix
2325  // of jump table/bit test/binary search tree. The cost of the switch is
2326  // proportional to the size of the tree or the size of jump table range.
2327  //
2328  // NB: We convert large switches which are just used to initialize large phi
2329  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2330  // inlining those. It will prevent inlining in cases where the optimization
2331  // does not (yet) fire.
2332 
2333  unsigned JumpTableSize = 0;
2334  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2335  unsigned NumCaseCluster =
2336  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2337 
2338  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2339  return false;
2340 }
2341 
2342 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2343  // We never want to inline functions that contain an indirectbr. This is
2344  // incorrect because all the blockaddress's (in static global initializers
2345  // for example) would be referring to the original function, and this
2346  // indirect jump would jump from the inlined copy of the function into the
2347  // original function which is extremely undefined behavior.
2348  // FIXME: This logic isn't really right; we can safely inline functions with
2349  // indirectbr's as long as no other function or global references the
2350  // blockaddress of a block within the current function.
2351  HasIndirectBr = true;
2352  return false;
2353 }
2354 
2355 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2356  // FIXME: It's not clear that a single instruction is an accurate model for
2357  // the inline cost of a resume instruction.
2358  return false;
2359 }
2360 
2361 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2362  // FIXME: It's not clear that a single instruction is an accurate model for
2363  // the inline cost of a cleanupret instruction.
2364  return false;
2365 }
2366 
2367 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2368  // FIXME: It's not clear that a single instruction is an accurate model for
2369  // the inline cost of a catchret instruction.
2370  return false;
2371 }
2372 
2373 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2374  // FIXME: It might be reasonably to discount the cost of instructions leading
2375  // to unreachable as they have the lowest possible impact on both runtime and
2376  // code size.
2377  return true; // No actual code is needed for unreachable.
2378 }
2379 
2380 bool CallAnalyzer::visitInstruction(Instruction &I) {
2381  // Some instructions are free. All of the free intrinsics can also be
2382  // handled by SROA, etc.
2385  return true;
2386 
2387  // We found something we don't understand or can't handle. Mark any SROA-able
2388  // values in the operand list as no longer viable.
2389  for (const Use &Op : I.operands())
2390  disableSROA(Op);
2391 
2392  return false;
2393 }
2394 
2395 /// Analyze a basic block for its contribution to the inline cost.
2396 ///
2397 /// This method walks the analyzer over every instruction in the given basic
2398 /// block and accounts for their cost during inlining at this callsite. It
2399 /// aborts early if the threshold has been exceeded or an impossible to inline
2400 /// construct has been detected. It returns false if inlining is no longer
2401 /// viable, and true if inlining remains viable.
2403 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2404  SmallPtrSetImpl<const Value *> &EphValues) {
2405  for (Instruction &I : *BB) {
2406  // FIXME: Currently, the number of instructions in a function regardless of
2407  // our ability to simplify them during inline to constants or dead code,
2408  // are actually used by the vector bonus heuristic. As long as that's true,
2409  // we have to special case debug intrinsics here to prevent differences in
2410  // inlining due to debug symbols. Eventually, the number of unsimplified
2411  // instructions shouldn't factor into the cost computation, but until then,
2412  // hack around it here.
2413  // Similarly, skip pseudo-probes.
2414  if (I.isDebugOrPseudoInst())
2415  continue;
2416 
2417  // Skip ephemeral values.
2418  if (EphValues.count(&I))
2419  continue;
2420 
2421  ++NumInstructions;
2422  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2423  ++NumVectorInstructions;
2424 
2425  // If the instruction simplified to a constant, there is no cost to this
2426  // instruction. Visit the instructions using our InstVisitor to account for
2427  // all of the per-instruction logic. The visit tree returns true if we
2428  // consumed the instruction in any way, and false if the instruction's base
2429  // cost should count against inlining.
2430  onInstructionAnalysisStart(&I);
2431 
2432  if (Base::visit(&I))
2433  ++NumInstructionsSimplified;
2434  else
2435  onMissedSimplification();
2436 
2437  onInstructionAnalysisFinish(&I);
2438  using namespace ore;
2439  // If the visit this instruction detected an uninlinable pattern, abort.
2441  if (IsRecursiveCall && !AllowRecursiveCall)
2442  IR = InlineResult::failure("recursive");
2443  else if (ExposesReturnsTwice)
2444  IR = InlineResult::failure("exposes returns twice");
2445  else if (HasDynamicAlloca)
2446  IR = InlineResult::failure("dynamic alloca");
2447  else if (HasIndirectBr)
2448  IR = InlineResult::failure("indirect branch");
2449  else if (HasUninlineableIntrinsic)
2450  IR = InlineResult::failure("uninlinable intrinsic");
2451  else if (InitsVargArgs)
2452  IR = InlineResult::failure("varargs");
2453  if (!IR.isSuccess()) {
2454  if (ORE)
2455  ORE->emit([&]() {
2456  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2457  &CandidateCall)
2458  << NV("Callee", &F) << " has uninlinable pattern ("
2459  << NV("InlineResult", IR.getFailureReason())
2460  << ") and cost is not fully computed";
2461  });
2462  return IR;
2463  }
2464 
2465  // If the caller is a recursive function then we don't want to inline
2466  // functions which allocate a lot of stack space because it would increase
2467  // the caller stack usage dramatically.
2468  if (IsCallerRecursive &&
2470  auto IR =
2471  InlineResult::failure("recursive and allocates too much stack space");
2472  if (ORE)
2473  ORE->emit([&]() {
2474  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2475  &CandidateCall)
2476  << NV("Callee", &F) << " is "
2477  << NV("InlineResult", IR.getFailureReason())
2478  << ". Cost is not fully computed";
2479  });
2480  return IR;
2481  }
2482 
2483  if (shouldStop())
2484  return InlineResult::failure(
2485  "Call site analysis is not favorable to inlining.");
2486  }
2487 
2488  return InlineResult::success();
2489 }
2490 
2491 /// Compute the base pointer and cumulative constant offsets for V.
2492 ///
2493 /// This strips all constant offsets off of V, leaving it the base pointer, and
2494 /// accumulates the total constant offset applied in the returned constant. It
2495 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2496 /// no constant offsets applied.
2497 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2498  if (!V->getType()->isPointerTy())
2499  return nullptr;
2500 
2501  unsigned AS = V->getType()->getPointerAddressSpace();
2502  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2503  APInt Offset = APInt::getZero(IntPtrWidth);
2504 
2505  // Even though we don't look through PHI nodes, we could be called on an
2506  // instruction in an unreachable block, which may be on a cycle.
2507  SmallPtrSet<Value *, 4> Visited;
2508  Visited.insert(V);
2509  do {
2510  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2511  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2512  return nullptr;
2513  V = GEP->getPointerOperand();
2514  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2515  V = cast<Operator>(V)->getOperand(0);
2516  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2517  if (GA->isInterposable())
2518  break;
2519  V = GA->getAliasee();
2520  } else {
2521  break;
2522  }
2523  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2524  } while (Visited.insert(V).second);
2525 
2526  Type *IdxPtrTy = DL.getIndexType(V->getType());
2527  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2528 }
2529 
2530 /// Find dead blocks due to deleted CFG edges during inlining.
2531 ///
2532 /// If we know the successor of the current block, \p CurrBB, has to be \p
2533 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2534 /// no live incoming CFG edges. If one block is found to be dead, we can
2535 /// continue growing the dead block list by checking the successors of the dead
2536 /// blocks to see if all their incoming edges are dead or not.
2537 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2538  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2539  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2540  // known successor which is not the one under exam.
2541  return (DeadBlocks.count(Pred) ||
2542  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2543  };
2544 
2545  auto IsNewlyDead = [&](BasicBlock *BB) {
2546  // If all the edges to a block are dead, the block is also dead.
2547  return (!DeadBlocks.count(BB) &&
2549  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2550  };
2551 
2552  for (BasicBlock *Succ : successors(CurrBB)) {
2553  if (Succ == NextBB || !IsNewlyDead(Succ))
2554  continue;
2556  NewDead.push_back(Succ);
2557  while (!NewDead.empty()) {
2558  BasicBlock *Dead = NewDead.pop_back_val();
2559  if (DeadBlocks.insert(Dead).second)
2560  // Continue growing the dead block lists.
2561  for (BasicBlock *S : successors(Dead))
2562  if (IsNewlyDead(S))
2563  NewDead.push_back(S);
2564  }
2565  }
2566 }
2567 
2568 /// Analyze a call site for potential inlining.
2569 ///
2570 /// Returns true if inlining this call is viable, and false if it is not
2571 /// viable. It computes the cost and adjusts the threshold based on numerous
2572 /// factors and heuristics. If this method returns false but the computed cost
2573 /// is below the computed threshold, then inlining was forcibly disabled by
2574 /// some artifact of the routine.
2575 InlineResult CallAnalyzer::analyze() {
2576  ++NumCallsAnalyzed;
2577 
2578  auto Result = onAnalysisStart();
2579  if (!Result.isSuccess())
2580  return Result;
2581 
2582  if (F.empty())
2583  return InlineResult::success();
2584 
2585  Function *Caller = CandidateCall.getFunction();
2586  // Check if the caller function is recursive itself.
2587  for (User *U : Caller->users()) {
2588  CallBase *Call = dyn_cast<CallBase>(U);
2589  if (Call && Call->getFunction() == Caller) {
2590  IsCallerRecursive = true;
2591  break;
2592  }
2593  }
2594 
2595  // Populate our simplified values by mapping from function arguments to call
2596  // arguments with known important simplifications.
2597  auto CAI = CandidateCall.arg_begin();
2598  for (Argument &FAI : F.args()) {
2599  assert(CAI != CandidateCall.arg_end());
2600  if (Constant *C = dyn_cast<Constant>(CAI))
2601  SimplifiedValues[&FAI] = C;
2602 
2603  Value *PtrArg = *CAI;
2604  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2605  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2606 
2607  // We can SROA any pointer arguments derived from alloca instructions.
2608  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2609  SROAArgValues[&FAI] = SROAArg;
2610  onInitializeSROAArg(SROAArg);
2611  EnabledSROAAllocas.insert(SROAArg);
2612  }
2613  }
2614  ++CAI;
2615  }
2616  NumConstantArgs = SimplifiedValues.size();
2617  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2618  NumAllocaArgs = SROAArgValues.size();
2619 
2620  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2621  // the ephemeral values multiple times (and they're completely determined by
2622  // the callee, so this is purely duplicate work).
2624  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2625 
2626  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2627  // adding basic blocks of the callee which can be proven to be dead for this
2628  // particular call site in order to get more accurate cost estimates. This
2629  // requires a somewhat heavyweight iteration pattern: we need to walk the
2630  // basic blocks in a breadth-first order as we insert live successors. To
2631  // accomplish this, prioritizing for small iterations because we exit after
2632  // crossing our threshold, we use a small-size optimized SetVector.
2635  BBSetVector;
2636  BBSetVector BBWorklist;
2637  BBWorklist.insert(&F.getEntryBlock());
2638 
2639  // Note that we *must not* cache the size, this loop grows the worklist.
2640  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2641  if (shouldStop())
2642  break;
2643 
2644  BasicBlock *BB = BBWorklist[Idx];
2645  if (BB->empty())
2646  continue;
2647 
2648  onBlockStart(BB);
2649 
2650  // Disallow inlining a blockaddress with uses other than strictly callbr.
2651  // A blockaddress only has defined behavior for an indirect branch in the
2652  // same function, and we do not currently support inlining indirect
2653  // branches. But, the inliner may not see an indirect branch that ends up
2654  // being dead code at a particular call site. If the blockaddress escapes
2655  // the function, e.g., via a global variable, inlining may lead to an
2656  // invalid cross-function reference.
2657  // FIXME: pr/39560: continue relaxing this overt restriction.
2658  if (BB->hasAddressTaken())
2659  for (User *U : BlockAddress::get(&*BB)->users())
2660  if (!isa<CallBrInst>(*U))
2661  return InlineResult::failure("blockaddress used outside of callbr");
2662 
2663  // Analyze the cost of this block. If we blow through the threshold, this
2664  // returns false, and we can bail on out.
2665  InlineResult IR = analyzeBlock(BB, EphValues);
2666  if (!IR.isSuccess())
2667  return IR;
2668 
2669  Instruction *TI = BB->getTerminator();
2670 
2671  // Add in the live successors by first checking whether we have terminator
2672  // that may be simplified based on the values simplified by this call.
2673  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2674  if (BI->isConditional()) {
2675  Value *Cond = BI->getCondition();
2676  if (ConstantInt *SimpleCond =
2677  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2678  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2679  BBWorklist.insert(NextBB);
2680  KnownSuccessors[BB] = NextBB;
2681  findDeadBlocks(BB, NextBB);
2682  continue;
2683  }
2684  }
2685  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2686  Value *Cond = SI->getCondition();
2687  if (ConstantInt *SimpleCond =
2688  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2689  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2690  BBWorklist.insert(NextBB);
2691  KnownSuccessors[BB] = NextBB;
2692  findDeadBlocks(BB, NextBB);
2693  continue;
2694  }
2695  }
2696 
2697  // If we're unable to select a particular successor, just count all of
2698  // them.
2699  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2700  ++TIdx)
2701  BBWorklist.insert(TI->getSuccessor(TIdx));
2702 
2703  onBlockAnalyzed(BB);
2704  }
2705 
2706  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
2707  &F == CandidateCall.getCalledFunction();
2708  // If this is a noduplicate call, we can still inline as long as
2709  // inlining this would cause the removal of the caller (so the instruction
2710  // is not actually duplicated, just moved).
2711  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2712  return InlineResult::failure("noduplicate");
2713 
2714  return finalizeAnalysis();
2715 }
2716 
2718 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2720  F.print(OS, &Writer);
2721  DEBUG_PRINT_STAT(NumConstantArgs);
2722  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2723  DEBUG_PRINT_STAT(NumAllocaArgs);
2724  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2725  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2726  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2727  DEBUG_PRINT_STAT(NumInstructions);
2728  DEBUG_PRINT_STAT(SROACostSavings);
2729  DEBUG_PRINT_STAT(SROACostSavingsLost);
2730  DEBUG_PRINT_STAT(LoadEliminationCost);
2731  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2732  DEBUG_PRINT_STAT(Cost);
2733  DEBUG_PRINT_STAT(Threshold);
2734 #undef DEBUG_PRINT_STAT
2735 }
2736 
2737 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2738 /// Dump stats about this call's analysis.
2740 #endif
2741 
2742 /// Test that there are no attribute conflicts between Caller and Callee
2743 /// that prevent inlining.
2745  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2746  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2747  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2748  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2749  // object, and always returns the same object (which is overwritten on each
2750  // GetTLI call). Therefore we copy the first result.
2751  auto CalleeTLI = GetTLI(*Callee);
2752  return (IgnoreTTIInlineCompatible ||
2753  TTI.areInlineCompatible(Caller, Callee)) &&
2754  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2757 }
2758 
2760  int Cost = 0;
2761  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2762  if (Call.isByValArgument(I)) {
2763  // We approximate the number of loads and stores needed by dividing the
2764  // size of the byval type by the target's pointer size.
2765  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2766  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2767  unsigned AS = PTy->getAddressSpace();
2768  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2769  // Ceiling division.
2770  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2771 
2772  // If it generates more than 8 stores it is likely to be expanded as an
2773  // inline memcpy so we take that as an upper bound. Otherwise we assume
2774  // one load and one store per word copied.
2775  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2776  // here instead of a magic number of 8, but it's not available via
2777  // DataLayout.
2778  NumStores = std::min(NumStores, 8U);
2779 
2780  Cost += 2 * NumStores * InlineConstants::InstrCost;
2781  } else {
2782  // For non-byval arguments subtract off one instruction per call
2783  // argument.
2785  }
2786  }
2787  // The call instruction also disappears after inlining.
2789  return Cost;
2790 }
2791 
2793  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2794  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2795  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2798  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2799  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2800 }
2801 
2803  CallBase &Call, TargetTransformInfo &CalleeTTI,
2804  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2807  const InlineParams Params = {/* DefaultThreshold*/ 0,
2808  /*HintThreshold*/ {},
2809  /*ColdThreshold*/ {},
2810  /*OptSizeThreshold*/ {},
2811  /*OptMinSizeThreshold*/ {},
2812  /*HotCallSiteThreshold*/ {},
2813  /*LocallyHotCallSiteThreshold*/ {},
2814  /*ColdCallSiteThreshold*/ {},
2815  /*ComputeFullInlineCost*/ true,
2816  /*EnableDeferral*/ true};
2817 
2818  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2819  GetAssumptionCache, GetBFI, PSI, ORE, true,
2820  /*IgnoreThreshold*/ true);
2821  auto R = CA.analyze();
2822  if (!R.isSuccess())
2823  return None;
2824  return CA.getCost();
2825 }
2826 
2828  CallBase &Call, TargetTransformInfo &CalleeTTI,
2829  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2832  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2833  ORE, *Call.getCalledFunction(), Call);
2834  auto R = CFA.analyze();
2835  if (!R.isSuccess())
2836  return None;
2837  return CFA.features();
2838 }
2839 
2841  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2842  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2843 
2844  // Cannot inline indirect calls.
2845  if (!Callee)
2846  return InlineResult::failure("indirect call");
2847 
2848  // When callee coroutine function is inlined into caller coroutine function
2849  // before coro-split pass,
2850  // coro-early pass can not handle this quiet well.
2851  // So we won't inline the coroutine function if it have not been unsplited
2852  if (Callee->isPresplitCoroutine())
2853  return InlineResult::failure("unsplited coroutine call");
2854 
2855  // Never inline calls with byval arguments that does not have the alloca
2856  // address space. Since byval arguments can be replaced with a copy to an
2857  // alloca, the inlined code would need to be adjusted to handle that the
2858  // argument is in the alloca address space (so it is a little bit complicated
2859  // to solve).
2860  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2861  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2862  if (Call.isByValArgument(I)) {
2863  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2864  if (PTy->getAddressSpace() != AllocaAS)
2865  return InlineResult::failure("byval arguments without alloca"
2866  " address space");
2867  }
2868 
2869  // Calls to functions with always-inline attributes should be inlined
2870  // whenever possible.
2871  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2872  if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2873  return InlineResult::failure("noinline call site attribute");
2874 
2875  auto IsViable = isInlineViable(*Callee);
2876  if (IsViable.isSuccess())
2877  return InlineResult::success();
2878  return InlineResult::failure(IsViable.getFailureReason());
2879  }
2880 
2881  // Never inline functions with conflicting attributes (unless callee has
2882  // always-inline attribute).
2883  Function *Caller = Call.getCaller();
2884  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2885  return InlineResult::failure("conflicting attributes");
2886 
2887  // Don't inline this call if the caller has the optnone attribute.
2888  if (Caller->hasOptNone())
2889  return InlineResult::failure("optnone attribute");
2890 
2891  // Don't inline a function that treats null pointer as valid into a caller
2892  // that does not have this attribute.
2893  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2894  return InlineResult::failure("nullptr definitions incompatible");
2895 
2896  // Don't inline functions which can be interposed at link-time.
2897  if (Callee->isInterposable())
2898  return InlineResult::failure("interposable");
2899 
2900  // Don't inline functions marked noinline.
2901  if (Callee->hasFnAttribute(Attribute::NoInline))
2902  return InlineResult::failure("noinline function attribute");
2903 
2904  // Don't inline call sites marked noinline.
2905  if (Call.isNoInline())
2906  return InlineResult::failure("noinline call site attribute");
2907 
2908  return None;
2909 }
2910 
2912  CallBase &Call, Function *Callee, const InlineParams &Params,
2913  TargetTransformInfo &CalleeTTI,
2914  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2915  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2918 
2919  auto UserDecision =
2920  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2921 
2922  if (UserDecision.hasValue()) {
2923  if (UserDecision->isSuccess())
2924  return llvm::InlineCost::getAlways("always inline attribute");
2925  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2926  }
2927 
2928  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2929  << "... (caller:" << Call.getCaller()->getName()
2930  << ")\n");
2931 
2932  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2933  GetAssumptionCache, GetBFI, PSI, ORE);
2934  InlineResult ShouldInline = CA.analyze();
2935 
2936  LLVM_DEBUG(CA.dump());
2937 
2938  // Always make cost benefit based decision explicit.
2939  // We use always/never here since threshold is not meaningful,
2940  // as it's not what drives cost-benefit analysis.
2941  if (CA.wasDecidedByCostBenefit()) {
2942  if (ShouldInline.isSuccess())
2943  return InlineCost::getAlways("benefit over cost",
2944  CA.getCostBenefitPair());
2945  else
2946  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2947  }
2948 
2949  if (CA.wasDecidedByCostThreshold())
2950  return InlineCost::get(CA.getCost(), CA.getThreshold());
2951 
2952  // No details on how the decision was made, simply return always or never.
2953  return ShouldInline.isSuccess()
2954  ? InlineCost::getAlways("empty function")
2955  : InlineCost::getNever(ShouldInline.getFailureReason());
2956 }
2957 
2959  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2960  for (BasicBlock &BB : F) {
2961  // Disallow inlining of functions which contain indirect branches.
2962  if (isa<IndirectBrInst>(BB.getTerminator()))
2963  return InlineResult::failure("contains indirect branches");
2964 
2965  // Disallow inlining of blockaddresses which are used by non-callbr
2966  // instructions.
2967  if (BB.hasAddressTaken())
2968  for (User *U : BlockAddress::get(&BB)->users())
2969  if (!isa<CallBrInst>(*U))
2970  return InlineResult::failure("blockaddress used outside of callbr");
2971 
2972  for (auto &II : BB) {
2973  CallBase *Call = dyn_cast<CallBase>(&II);
2974  if (!Call)
2975  continue;
2976 
2977  // Disallow recursive calls.
2978  Function *Callee = Call->getCalledFunction();
2979  if (&F == Callee)
2980  return InlineResult::failure("recursive call");
2981 
2982  // Disallow calls which expose returns-twice to a function not previously
2983  // attributed as such.
2984  if (!ReturnsTwice && isa<CallInst>(Call) &&
2985  cast<CallInst>(Call)->canReturnTwice())
2986  return InlineResult::failure("exposes returns-twice attribute");
2987 
2988  if (Callee)
2989  switch (Callee->getIntrinsicID()) {
2990  default:
2991  break;
2992  case llvm::Intrinsic::icall_branch_funnel:
2993  // Disallow inlining of @llvm.icall.branch.funnel because current
2994  // backend can't separate call targets from call arguments.
2995  return InlineResult::failure(
2996  "disallowed inlining of @llvm.icall.branch.funnel");
2997  case llvm::Intrinsic::localescape:
2998  // Disallow inlining functions that call @llvm.localescape. Doing this
2999  // correctly would require major changes to the inliner.
3000  return InlineResult::failure(
3001  "disallowed inlining of @llvm.localescape");
3002  case llvm::Intrinsic::vastart:
3003  // Disallow inlining of functions that initialize VarArgs with
3004  // va_start.
3005  return InlineResult::failure(
3006  "contains VarArgs initialized with va_start");
3007  }
3008  }
3009  }
3010 
3011  return InlineResult::success();
3012 }
3013 
3014 // APIs to create InlineParams based on command line flags and/or other
3015 // parameters.
3016 
3018  InlineParams Params;
3019 
3020  // This field is the threshold to use for a callee by default. This is
3021  // derived from one or more of:
3022  // * optimization or size-optimization levels,
3023  // * a value passed to createFunctionInliningPass function, or
3024  // * the -inline-threshold flag.
3025  // If the -inline-threshold flag is explicitly specified, that is used
3026  // irrespective of anything else.
3027  if (InlineThreshold.getNumOccurrences() > 0)
3029  else
3030  Params.DefaultThreshold = Threshold;
3031 
3032  // Set the HintThreshold knob from the -inlinehint-threshold.
3033  Params.HintThreshold = HintThreshold;
3034 
3035  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3037 
3038  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3039  // populate LocallyHotCallSiteThreshold. Later, we populate
3040  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3041  // we know that optimization level is O3 (in the getInlineParams variant that
3042  // takes the opt and size levels).
3043  // FIXME: Remove this check (and make the assignment unconditional) after
3044  // addressing size regression issues at O2.
3045  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3047 
3048  // Set the ColdCallSiteThreshold knob from the
3049  // -inline-cold-callsite-threshold.
3051 
3052  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3053  // -inlinehint-threshold commandline option is not explicitly given. If that
3054  // option is present, then its value applies even for callees with size and
3055  // minsize attributes.
3056  // If the -inline-threshold is not specified, set the ColdThreshold from the
3057  // -inlinecold-threshold even if it is not explicitly passed. If
3058  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3059  // explicitly specified to set the ColdThreshold knob
3060  if (InlineThreshold.getNumOccurrences() == 0) {
3063  Params.ColdThreshold = ColdThreshold;
3064  } else if (ColdThreshold.getNumOccurrences() > 0) {
3065  Params.ColdThreshold = ColdThreshold;
3066  }
3067  return Params;
3068 }
3069 
3072 }
3073 
3074 // Compute the default threshold for inlining based on the opt level and the
3075 // size opt level.
3076 static int computeThresholdFromOptLevels(unsigned OptLevel,
3077  unsigned SizeOptLevel) {
3078  if (OptLevel > 2)
3080  if (SizeOptLevel == 1) // -Os
3082  if (SizeOptLevel == 2) // -Oz
3084  return DefaultThreshold;
3085 }
3086 
3087 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3088  auto Params =
3089  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3090  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3091  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3092  // when it is specified explicitly.
3093  if (OptLevel > 2)
3095  return Params;
3096 }
3097 
3101  PrintInstructionComments = true;
3102  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3103  [&](Function &F) -> AssumptionCache & {
3104  return FAM.getResult<AssumptionAnalysis>(F);
3105  };
3106  Module *M = F.getParent();
3107  ProfileSummaryInfo PSI(*M);
3108  DataLayout DL(M);
3110  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3111  // In the current implementation, the type of InlineParams doesn't matter as
3112  // the pass serves only for verification of inliner's decisions.
3113  // We can add a flag which determines InlineParams for this run. Right now,
3114  // the default InlineParams are used.
3115  const InlineParams Params = llvm::getInlineParams();
3116  for (BasicBlock &BB : F) {
3117  for (Instruction &I : BB) {
3118  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3119  Function *CalledFunction = CI->getCalledFunction();
3120  if (!CalledFunction || CalledFunction->isDeclaration())
3121  continue;
3122  OptimizationRemarkEmitter ORE(CalledFunction);
3123  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3124  GetAssumptionCache, nullptr, &PSI, &ORE);
3125  ICCA.analyze();
3126  OS << " Analyzing call of " << CalledFunction->getName()
3127  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3128  ICCA.print(OS);
3129  OS << "\n";
3130  }
3131  }
3132  }
3133  return PreservedAnalyses::all();
3134 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:93
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:169
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:263
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::getStringFnAttrAsInt
Optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
Definition: InlineCost.cpp:146
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:830
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3017
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:192
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1724
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Attribute
Definition: Attributes.h:52
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5225
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2527
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:211
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2258
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2445
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ConstantExpr::getInsertValue
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2648
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:38
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform"))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1618
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:117
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1254
llvm::SimplifyFNegInst
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:5090
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2958
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:190
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
Operator.h
llvm::InlineConstants::FunctionInlineCostMultiplierAttributeName
const char FunctionInlineCostMultiplierAttributeName[]
Definition: InlineCost.h:59
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:198
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:199
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:204
llvm::UnaryOperator
Definition: InstrTypes.h:101
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2744
F
#define F(x, y, z)
Definition: MD5.cpp:55
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2726
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:280
CodeMetrics.h
FormattedStream.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1001
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:241
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2792
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantExpr::getIntToPtr
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2244
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::ConstantExpr::getPtrToInt
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2230
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:87
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3082
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1386
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:51
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1043
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:173
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3099
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:745
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:112
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3070
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:47
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:305
llvm::None
const NoneType None
Definition: None.h:24
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
IgnoreTTIInlineCompatible
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3776
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2423
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:409
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:48
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4551
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::SimplifyBinOp
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5542
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:261
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:34
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:893
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:71
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:44
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:325
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:279
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:119
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2002
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1737
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:279
llvm::Record
Definition: Record.h:1508
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:214
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:121
llvm::GEPOperator
Definition: Operator.h:375
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3177
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:261
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1615
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:178
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:217
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:868
llvm::BranchProbability
Definition: BranchProbability.h:29
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5174
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:174
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:559
users
iv users
Definition: IVUsers.cpp:48
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:57
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2840
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4224
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:195
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:867
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:51
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2827
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2411
llvm::TypeSize
Definition: TypeSize.h:421
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:101
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:170
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1243
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:190
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:41
llvm::CleanupReturnInst
Definition: Instructions.h:4632
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:203
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5131
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3641
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:195
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3076
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:210
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2802
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2005
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2759
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:50
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:208
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1807
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2664
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4740
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:207
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3243
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:405
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:62
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:48
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:54
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:223
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2522
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:49
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:557
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:164
Debug.h
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:201
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3178
llvm::ConstantExpr::getExtractValue
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2672
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365