LLVM  14.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "inline-cost"
49 
50 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
51 
52 static cl::opt<int>
53  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
55  cl::desc("Default amount of inlining to perform"));
56 
58  "print-instruction-comments", cl::Hidden, cl::init(false),
59  cl::desc("Prints comments for instruction based on inline cost analysis"));
60 
62  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
63  cl::desc("Control the amount of inlining to perform (default = 225)"));
64 
66  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
67  cl::desc("Threshold for inlining functions with inline hint"));
68 
69 static cl::opt<int>
70  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
72  cl::desc("Threshold for inlining cold callsites"));
73 
75  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
76  cl::desc("Enable the cost-benefit analysis for the inliner"));
77 
79  "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
80  cl::desc("Multiplier to multiply cycle savings by during inlining"));
81 
82 static cl::opt<int>
83  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
85  cl::desc("The maximum size of a callee that get's "
86  "inlined without sufficient cycle savings"));
87 
88 // We introduce this threshold to help performance of instrumentation based
89 // PGO before we actually hook up inliner with analysis passes such as BPI and
90 // BFI.
92  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
93  cl::desc("Threshold for inlining functions with cold attribute"));
94 
95 static cl::opt<int>
96  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
98  cl::desc("Threshold for hot callsites "));
99 
101  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
102  cl::desc("Threshold for locally hot callsites "));
103 
105  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
106  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
107  "entry frequency, for a callsite to be cold in the absence of "
108  "profile information."));
109 
111  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
112  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
113  "entry frequency, for a callsite to be hot in the absence of "
114  "profile information."));
115 
117  "inline-call-penalty", cl::Hidden, cl::init(25),
118  cl::desc("Call penalty that is applied per callsite when inlining"));
119 
121  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
122  cl::desc("Compute the full inline cost of a call site even when the cost "
123  "exceeds the threshold."));
124 
126  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
128  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
129  "attributes."));
130 
132  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
133  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
134 
135 namespace {
136 class InlineCostCallAnalyzer;
137 
138 /// This function behaves more like CallBase::hasFnAttr: when it looks for the
139 /// requested attribute, it check both the call instruction and the called
140 /// function (if it's available and operand bundles don't prohibit that).
141 Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
142  Attribute CallAttr = CB.getFnAttr(AttrKind);
143  if (CallAttr.isValid())
144  return CallAttr;
145 
146  // Operand bundles override attributes on the called function, but don't
147  // override attributes directly present on the call instruction.
148  if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
149  if (const Function *F = CB.getCalledFunction())
150  return F->getFnAttribute(AttrKind);
151 
152  return {};
153 }
154 
155 Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
156  Attribute Attr = getFnAttr(CB, AttrKind);
157  int AttrValue;
158  if (Attr.getValueAsString().getAsInteger(10, AttrValue))
159  return None;
160  return AttrValue;
161 }
162 
163 // This struct is used to store information about inline cost of a
164 // particular instruction
165 struct InstructionCostDetail {
166  int CostBefore = 0;
167  int CostAfter = 0;
168  int ThresholdBefore = 0;
169  int ThresholdAfter = 0;
170 
171  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
172 
173  int getCostDelta() const { return CostAfter - CostBefore; }
174 
175  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
176 };
177 
178 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
179 private:
180  InlineCostCallAnalyzer *const ICCA;
181 
182 public:
183  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
184  virtual void emitInstructionAnnot(const Instruction *I,
185  formatted_raw_ostream &OS) override;
186 };
187 
188 /// Carry out call site analysis, in order to evaluate inlinability.
189 /// NOTE: the type is currently used as implementation detail of functions such
190 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
191 /// expectation is that they come from the outer scope, from the wrapper
192 /// functions. If we want to support constructing CallAnalyzer objects where
193 /// lambdas are provided inline at construction, or where the object needs to
194 /// otherwise survive past the scope of the provided functions, we need to
195 /// revisit the argument types.
196 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
198  friend class InstVisitor<CallAnalyzer, bool>;
199 
200 protected:
201  virtual ~CallAnalyzer() {}
202  /// The TargetTransformInfo available for this compilation.
203  const TargetTransformInfo &TTI;
204 
205  /// Getter for the cache of @llvm.assume intrinsics.
206  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
207 
208  /// Getter for BlockFrequencyInfo
210 
211  /// Profile summary information.
212  ProfileSummaryInfo *PSI;
213 
214  /// The called function.
215  Function &F;
216 
217  // Cache the DataLayout since we use it a lot.
218  const DataLayout &DL;
219 
220  /// The OptimizationRemarkEmitter available for this compilation.
222 
223  /// The candidate callsite being analyzed. Please do not use this to do
224  /// analysis in the caller function; we want the inline cost query to be
225  /// easily cacheable. Instead, use the cover function paramHasAttr.
226  CallBase &CandidateCall;
227 
228  /// Extension points for handling callsite features.
229  // Called before a basic block was analyzed.
230  virtual void onBlockStart(const BasicBlock *BB) {}
231 
232  /// Called after a basic block was analyzed.
233  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
234 
235  /// Called before an instruction was analyzed
236  virtual void onInstructionAnalysisStart(const Instruction *I) {}
237 
238  /// Called after an instruction was analyzed
239  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
240 
241  /// Called at the end of the analysis of the callsite. Return the outcome of
242  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
243  /// the reason it can't.
244  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
245  /// Called when we're about to start processing a basic block, and every time
246  /// we are done processing an instruction. Return true if there is no point in
247  /// continuing the analysis (e.g. we've determined already the call site is
248  /// too expensive to inline)
249  virtual bool shouldStop() { return false; }
250 
251  /// Called before the analysis of the callee body starts (with callsite
252  /// contexts propagated). It checks callsite-specific information. Return a
253  /// reason analysis can't continue if that's the case, or 'true' if it may
254  /// continue.
255  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
256  /// Called if the analysis engine decides SROA cannot be done for the given
257  /// alloca.
258  virtual void onDisableSROA(AllocaInst *Arg) {}
259 
260  /// Called the analysis engine determines load elimination won't happen.
261  virtual void onDisableLoadElimination() {}
262 
263  /// Called when we visit a CallBase, before the analysis starts. Return false
264  /// to stop further processing of the instruction.
265  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
266 
267  /// Called to account for a call.
268  virtual void onCallPenalty() {}
269 
270  /// Called to account for the expectation the inlining would result in a load
271  /// elimination.
272  virtual void onLoadEliminationOpportunity() {}
273 
274  /// Called to account for the cost of argument setup for the Call in the
275  /// callee's body (not the callsite currently under analysis).
276  virtual void onCallArgumentSetup(const CallBase &Call) {}
277 
278  /// Called to account for a load relative intrinsic.
279  virtual void onLoadRelativeIntrinsic() {}
280 
281  /// Called to account for a lowered call.
282  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
283  }
284 
285  /// Account for a jump table of given size. Return false to stop further
286  /// processing the switch instruction
287  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
288 
289  /// Account for a case cluster of given size. Return false to stop further
290  /// processing of the instruction.
291  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
292 
293  /// Called at the end of processing a switch instruction, with the given
294  /// number of case clusters.
295  virtual void onFinalizeSwitch(unsigned JumpTableSize,
296  unsigned NumCaseCluster) {}
297 
298  /// Called to account for any other instruction not specifically accounted
299  /// for.
300  virtual void onMissedSimplification() {}
301 
302  /// Start accounting potential benefits due to SROA for the given alloca.
303  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
304 
305  /// Account SROA savings for the AllocaInst value.
306  virtual void onAggregateSROAUse(AllocaInst *V) {}
307 
308  bool handleSROA(Value *V, bool DoNotDisable) {
309  // Check for SROA candidates in comparisons.
310  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
311  if (DoNotDisable) {
312  onAggregateSROAUse(SROAArg);
313  return true;
314  }
315  disableSROAForArg(SROAArg);
316  }
317  return false;
318  }
319 
320  bool IsCallerRecursive = false;
321  bool IsRecursiveCall = false;
322  bool ExposesReturnsTwice = false;
323  bool HasDynamicAlloca = false;
324  bool ContainsNoDuplicateCall = false;
325  bool HasReturn = false;
326  bool HasIndirectBr = false;
327  bool HasUninlineableIntrinsic = false;
328  bool InitsVargArgs = false;
329 
330  /// Number of bytes allocated statically by the callee.
331  uint64_t AllocatedSize = 0;
332  unsigned NumInstructions = 0;
333  unsigned NumVectorInstructions = 0;
334 
335  /// While we walk the potentially-inlined instructions, we build up and
336  /// maintain a mapping of simplified values specific to this callsite. The
337  /// idea is to propagate any special information we have about arguments to
338  /// this call through the inlinable section of the function, and account for
339  /// likely simplifications post-inlining. The most important aspect we track
340  /// is CFG altering simplifications -- when we prove a basic block dead, that
341  /// can cause dramatic shifts in the cost of inlining a function.
342  DenseMap<Value *, Constant *> SimplifiedValues;
343 
344  /// Keep track of the values which map back (through function arguments) to
345  /// allocas on the caller stack which could be simplified through SROA.
346  DenseMap<Value *, AllocaInst *> SROAArgValues;
347 
348  /// Keep track of Allocas for which we believe we may get SROA optimization.
349  DenseSet<AllocaInst *> EnabledSROAAllocas;
350 
351  /// Keep track of values which map to a pointer base and constant offset.
353 
354  /// Keep track of dead blocks due to the constant arguments.
355  SetVector<BasicBlock *> DeadBlocks;
356 
357  /// The mapping of the blocks to their known unique successors due to the
358  /// constant arguments.
359  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
360 
361  /// Model the elimination of repeated loads that is expected to happen
362  /// whenever we simplify away the stores that would otherwise cause them to be
363  /// loads.
364  bool EnableLoadElimination;
365 
366  /// Whether we allow inlining for recursive call.
367  bool AllowRecursiveCall;
368 
369  SmallPtrSet<Value *, 16> LoadAddrSet;
370 
371  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
372  auto It = SROAArgValues.find(V);
373  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
374  return nullptr;
375  return It->second;
376  }
377 
378  // Custom simplification helper routines.
379  bool isAllocaDerivedArg(Value *V);
380  void disableSROAForArg(AllocaInst *SROAArg);
381  void disableSROA(Value *V);
382  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
383  void disableLoadElimination();
384  bool isGEPFree(GetElementPtrInst &GEP);
385  bool canFoldInboundsGEP(GetElementPtrInst &I);
386  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
387  bool simplifyCallSite(Function *F, CallBase &Call);
388  template <typename Callable>
389  bool simplifyInstruction(Instruction &I, Callable Evaluate);
390  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
391  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
392 
393  /// Return true if the given argument to the function being considered for
394  /// inlining has the given attribute set either at the call site or the
395  /// function declaration. Primarily used to inspect call site specific
396  /// attributes since these can be more precise than the ones on the callee
397  /// itself.
398  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
399 
400  /// Return true if the given value is known non null within the callee if
401  /// inlined through this particular callsite.
402  bool isKnownNonNullInCallee(Value *V);
403 
404  /// Return true if size growth is allowed when inlining the callee at \p Call.
405  bool allowSizeGrowth(CallBase &Call);
406 
407  // Custom analysis routines.
408  InlineResult analyzeBlock(BasicBlock *BB,
409  SmallPtrSetImpl<const Value *> &EphValues);
410 
411  // Disable several entry points to the visitor so we don't accidentally use
412  // them by declaring but not defining them here.
413  void visit(Module *);
414  void visit(Module &);
415  void visit(Function *);
416  void visit(Function &);
417  void visit(BasicBlock *);
418  void visit(BasicBlock &);
419 
420  // Provide base case for our instruction visit.
421  bool visitInstruction(Instruction &I);
422 
423  // Our visit overrides.
424  bool visitAlloca(AllocaInst &I);
425  bool visitPHI(PHINode &I);
426  bool visitGetElementPtr(GetElementPtrInst &I);
427  bool visitBitCast(BitCastInst &I);
428  bool visitPtrToInt(PtrToIntInst &I);
429  bool visitIntToPtr(IntToPtrInst &I);
430  bool visitCastInst(CastInst &I);
431  bool visitCmpInst(CmpInst &I);
432  bool visitSub(BinaryOperator &I);
433  bool visitBinaryOperator(BinaryOperator &I);
434  bool visitFNeg(UnaryOperator &I);
435  bool visitLoad(LoadInst &I);
436  bool visitStore(StoreInst &I);
437  bool visitExtractValue(ExtractValueInst &I);
438  bool visitInsertValue(InsertValueInst &I);
439  bool visitCallBase(CallBase &Call);
440  bool visitReturnInst(ReturnInst &RI);
441  bool visitBranchInst(BranchInst &BI);
442  bool visitSelectInst(SelectInst &SI);
443  bool visitSwitchInst(SwitchInst &SI);
444  bool visitIndirectBrInst(IndirectBrInst &IBI);
445  bool visitResumeInst(ResumeInst &RI);
446  bool visitCleanupReturnInst(CleanupReturnInst &RI);
447  bool visitCatchReturnInst(CatchReturnInst &RI);
448  bool visitUnreachableInst(UnreachableInst &I);
449 
450 public:
451  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
452  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
453  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
454  ProfileSummaryInfo *PSI = nullptr,
455  OptimizationRemarkEmitter *ORE = nullptr)
456  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
457  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
458  CandidateCall(Call), EnableLoadElimination(true),
459  AllowRecursiveCall(false) {}
460 
461  InlineResult analyze();
462 
463  Optional<Constant *> getSimplifiedValue(Instruction *I) {
464  if (SimplifiedValues.find(I) != SimplifiedValues.end())
465  return SimplifiedValues[I];
466  return None;
467  }
468 
469  // Keep a bunch of stats about the cost savings found so we can print them
470  // out when debugging.
471  unsigned NumConstantArgs = 0;
472  unsigned NumConstantOffsetPtrArgs = 0;
473  unsigned NumAllocaArgs = 0;
474  unsigned NumConstantPtrCmps = 0;
475  unsigned NumConstantPtrDiffs = 0;
476  unsigned NumInstructionsSimplified = 0;
477 
478  void dump();
479 };
480 
481 // Considering forming a binary search, we should find the number of nodes
482 // which is same as the number of comparisons when lowered. For a given
483 // number of clusters, n, we can define a recursive function, f(n), to find
484 // the number of nodes in the tree. The recursion is :
485 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
486 // and f(n) = n, when n <= 3.
487 // This will lead a binary tree where the leaf should be either f(2) or f(3)
488 // when n > 3. So, the number of comparisons from leaves should be n, while
489 // the number of non-leaf should be :
490 // 2^(log2(n) - 1) - 1
491 // = 2^log2(n) * 2^-1 - 1
492 // = n / 2 - 1.
493 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
494 // number of comparisons in a simple closed form :
495 // n + n / 2 - 1 = n * 3 / 2 - 1
496 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
497  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
498 }
499 
500 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
501 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
502 class InlineCostCallAnalyzer final : public CallAnalyzer {
503  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
504  const bool ComputeFullInlineCost;
505  int LoadEliminationCost = 0;
506  /// Bonus to be applied when percentage of vector instructions in callee is
507  /// high (see more details in updateThreshold).
508  int VectorBonus = 0;
509  /// Bonus to be applied when the callee has only one reachable basic block.
510  int SingleBBBonus = 0;
511 
512  /// Tunable parameters that control the analysis.
513  const InlineParams &Params;
514 
515  // This DenseMap stores the delta change in cost and threshold after
516  // accounting for the given instruction. The map is filled only with the
517  // flag PrintInstructionComments on.
519 
520  /// Upper bound for the inlining cost. Bonuses are being applied to account
521  /// for speculative "expected profit" of the inlining decision.
522  int Threshold = 0;
523 
524  /// Attempt to evaluate indirect calls to boost its inline cost.
525  const bool BoostIndirectCalls;
526 
527  /// Ignore the threshold when finalizing analysis.
528  const bool IgnoreThreshold;
529 
530  // True if the cost-benefit-analysis-based inliner is enabled.
531  const bool CostBenefitAnalysisEnabled;
532 
533  /// Inlining cost measured in abstract units, accounts for all the
534  /// instructions expected to be executed for a given function invocation.
535  /// Instructions that are statically proven to be dead based on call-site
536  /// arguments are not counted here.
537  int Cost = 0;
538 
539  // The cumulative cost at the beginning of the basic block being analyzed. At
540  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
541  // the size of that basic block.
542  int CostAtBBStart = 0;
543 
544  // The static size of live but cold basic blocks. This is "static" in the
545  // sense that it's not weighted by profile counts at all.
546  int ColdSize = 0;
547 
548  // Whether inlining is decided by cost-threshold analysis.
549  bool DecidedByCostThreshold = false;
550 
551  // Whether inlining is decided by cost-benefit analysis.
552  bool DecidedByCostBenefit = false;
553 
554  // The cost-benefit pair computed by cost-benefit analysis.
555  Optional<CostBenefitPair> CostBenefit = None;
556 
557  bool SingleBB = true;
558 
559  unsigned SROACostSavings = 0;
560  unsigned SROACostSavingsLost = 0;
561 
562  /// The mapping of caller Alloca values to their accumulated cost savings. If
563  /// we have to disable SROA for one of the allocas, this tells us how much
564  /// cost must be added.
565  DenseMap<AllocaInst *, int> SROAArgCosts;
566 
567  /// Return true if \p Call is a cold callsite.
568  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
569 
570  /// Update Threshold based on callsite properties such as callee
571  /// attributes and callee hotness for PGO builds. The Callee is explicitly
572  /// passed to support analyzing indirect calls whose target is inferred by
573  /// analysis.
574  void updateThreshold(CallBase &Call, Function &Callee);
575  /// Return a higher threshold if \p Call is a hot callsite.
576  Optional<int> getHotCallSiteThreshold(CallBase &Call,
577  BlockFrequencyInfo *CallerBFI);
578 
579  /// Handle a capped 'int' increment for Cost.
580  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
581  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
582  Cost = std::min<int>(UpperBound, Cost + Inc);
583  }
584 
585  void onDisableSROA(AllocaInst *Arg) override {
586  auto CostIt = SROAArgCosts.find(Arg);
587  if (CostIt == SROAArgCosts.end())
588  return;
589  addCost(CostIt->second);
590  SROACostSavings -= CostIt->second;
591  SROACostSavingsLost += CostIt->second;
592  SROAArgCosts.erase(CostIt);
593  }
594 
595  void onDisableLoadElimination() override {
596  addCost(LoadEliminationCost);
597  LoadEliminationCost = 0;
598  }
599 
600  bool onCallBaseVisitStart(CallBase &Call) override {
601  if (Optional<int> AttrCallThresholdBonus =
602  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
603  Threshold += *AttrCallThresholdBonus;
604 
605  if (Optional<int> AttrCallCost =
606  getStringFnAttrAsInt(Call, "call-inline-cost")) {
607  addCost(*AttrCallCost);
608  // Prevent further processing of the call since we want to override its
609  // inline cost, not just add to it.
610  return false;
611  }
612  return true;
613  }
614 
615  void onCallPenalty() override { addCost(CallPenalty); }
616  void onCallArgumentSetup(const CallBase &Call) override {
617  // Pay the price of the argument setup. We account for the average 1
618  // instruction per call argument setup here.
619  addCost(Call.arg_size() * InlineConstants::InstrCost);
620  }
621  void onLoadRelativeIntrinsic() override {
622  // This is normally lowered to 4 LLVM instructions.
623  addCost(3 * InlineConstants::InstrCost);
624  }
625  void onLoweredCall(Function *F, CallBase &Call,
626  bool IsIndirectCall) override {
627  // We account for the average 1 instruction per call argument setup here.
628  addCost(Call.arg_size() * InlineConstants::InstrCost);
629 
630  // If we have a constant that we are calling as a function, we can peer
631  // through it and see the function target. This happens not infrequently
632  // during devirtualization and so we want to give it a hefty bonus for
633  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
634  // Pretend to inline the function, with a custom threshold.
635  if (IsIndirectCall && BoostIndirectCalls) {
636  auto IndirectCallParams = Params;
637  IndirectCallParams.DefaultThreshold =
639  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
640  /// to instantiate the derived class.
641  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
642  GetAssumptionCache, GetBFI, PSI, ORE, false);
643  if (CA.analyze().isSuccess()) {
644  // We were able to inline the indirect call! Subtract the cost from the
645  // threshold to get the bonus we want to apply, but don't go below zero.
646  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
647  }
648  } else
649  // Otherwise simply add the cost for merely making the call.
650  addCost(CallPenalty);
651  }
652 
653  void onFinalizeSwitch(unsigned JumpTableSize,
654  unsigned NumCaseCluster) override {
655  // If suitable for a jump table, consider the cost for the table size and
656  // branch to destination.
657  // Maximum valid cost increased in this function.
658  if (JumpTableSize) {
659  int64_t JTCost =
660  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
662 
663  addCost(JTCost, static_cast<int64_t>(CostUpperBound));
664  return;
665  }
666 
667  if (NumCaseCluster <= 3) {
668  // Suppose a comparison includes one compare and one conditional branch.
669  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
670  return;
671  }
672 
673  int64_t ExpectedNumberOfCompare =
674  getExpectedNumberOfCompare(NumCaseCluster);
675  int64_t SwitchCost =
676  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
677 
678  addCost(SwitchCost, static_cast<int64_t>(CostUpperBound));
679  }
680  void onMissedSimplification() override {
682  }
683 
684  void onInitializeSROAArg(AllocaInst *Arg) override {
685  assert(Arg != nullptr &&
686  "Should not initialize SROA costs for null value.");
687  SROAArgCosts[Arg] = 0;
688  }
689 
690  void onAggregateSROAUse(AllocaInst *SROAArg) override {
691  auto CostIt = SROAArgCosts.find(SROAArg);
692  assert(CostIt != SROAArgCosts.end() &&
693  "expected this argument to have a cost");
694  CostIt->second += InlineConstants::InstrCost;
695  SROACostSavings += InlineConstants::InstrCost;
696  }
697 
698  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
699 
700  void onBlockAnalyzed(const BasicBlock *BB) override {
701  if (CostBenefitAnalysisEnabled) {
702  // Keep track of the static size of live but cold basic blocks. For now,
703  // we define a cold basic block to be one that's never executed.
704  assert(GetBFI && "GetBFI must be available");
705  BlockFrequencyInfo *BFI = &(GetBFI(F));
706  assert(BFI && "BFI must be available");
707  auto ProfileCount = BFI->getBlockProfileCount(BB);
708  assert(ProfileCount.hasValue());
709  if (ProfileCount.getValue() == 0)
710  ColdSize += Cost - CostAtBBStart;
711  }
712 
713  auto *TI = BB->getTerminator();
714  // If we had any successors at this point, than post-inlining is likely to
715  // have them as well. Note that we assume any basic blocks which existed
716  // due to branches or switches which folded above will also fold after
717  // inlining.
718  if (SingleBB && TI->getNumSuccessors() > 1) {
719  // Take off the bonus we applied to the threshold.
720  Threshold -= SingleBBBonus;
721  SingleBB = false;
722  }
723  }
724 
725  void onInstructionAnalysisStart(const Instruction *I) override {
726  // This function is called to store the initial cost of inlining before
727  // the given instruction was assessed.
729  return;
730  InstructionCostDetailMap[I].CostBefore = Cost;
731  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
732  }
733 
734  void onInstructionAnalysisFinish(const Instruction *I) override {
735  // This function is called to find new values of cost and threshold after
736  // the instruction has been assessed.
738  return;
739  InstructionCostDetailMap[I].CostAfter = Cost;
740  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
741  }
742 
743  bool isCostBenefitAnalysisEnabled() {
744  if (!PSI || !PSI->hasProfileSummary())
745  return false;
746 
747  if (!GetBFI)
748  return false;
749 
751  // Honor the explicit request from the user.
753  return false;
754  } else {
755  // Otherwise, require instrumentation profile.
756  if (!PSI->hasInstrumentationProfile())
757  return false;
758  }
759 
760  auto *Caller = CandidateCall.getParent()->getParent();
761  if (!Caller->getEntryCount())
762  return false;
763 
764  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
765  if (!CallerBFI)
766  return false;
767 
768  // For now, limit to hot call site.
769  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
770  return false;
771 
772  // Make sure we have a nonzero entry count.
773  auto EntryCount = F.getEntryCount();
774  if (!EntryCount || !EntryCount->getCount())
775  return false;
776 
777  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
778  if (!CalleeBFI)
779  return false;
780 
781  return true;
782  }
783 
784  // Determine whether we should inline the given call site, taking into account
785  // both the size cost and the cycle savings. Return None if we don't have
786  // suficient profiling information to determine.
787  Optional<bool> costBenefitAnalysis() {
788  if (!CostBenefitAnalysisEnabled)
789  return None;
790 
791  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
792  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
793  // falling back to the cost-based metric.
794  // TODO: Improve this hacky condition.
795  if (Threshold == 0)
796  return None;
797 
798  assert(GetBFI);
799  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
800  assert(CalleeBFI);
801 
802  // The cycle savings expressed as the sum of InlineConstants::InstrCost
803  // multiplied by the estimated dynamic count of each instruction we can
804  // avoid. Savings come from the call site cost, such as argument setup and
805  // the call instruction, as well as the instructions that are folded.
806  //
807  // We use 128-bit APInt here to avoid potential overflow. This variable
808  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
809  // assumes that we can avoid or fold a billion instructions, each with a
810  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
811  // period on a 4GHz machine.
812  APInt CycleSavings(128, 0);
813 
814  for (auto &BB : F) {
815  APInt CurrentSavings(128, 0);
816  for (auto &I : BB) {
817  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
818  // Count a conditional branch as savings if it becomes unconditional.
819  if (BI->isConditional() &&
820  isa_and_nonnull<ConstantInt>(
821  SimplifiedValues.lookup(BI->getCondition()))) {
822  CurrentSavings += InlineConstants::InstrCost;
823  }
824  } else if (Value *V = dyn_cast<Value>(&I)) {
825  // Count an instruction as savings if we can fold it.
826  if (SimplifiedValues.count(V)) {
827  CurrentSavings += InlineConstants::InstrCost;
828  }
829  }
830  }
831 
832  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
833  assert(ProfileCount.hasValue());
834  CurrentSavings *= ProfileCount.getValue();
835  CycleSavings += CurrentSavings;
836  }
837 
838  // Compute the cycle savings per call.
839  auto EntryProfileCount = F.getEntryCount();
840  assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
841  auto EntryCount = EntryProfileCount->getCount();
842  CycleSavings += EntryCount / 2;
843  CycleSavings = CycleSavings.udiv(EntryCount);
844 
845  // Compute the total savings for the call site.
846  auto *CallerBB = CandidateCall.getParent();
847  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
848  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
849  CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
850 
851  // Remove the cost of the cold basic blocks.
852  int Size = Cost - ColdSize;
853 
854  // Allow tiny callees to be inlined regardless of whether they meet the
855  // savings threshold.
857 
858  CostBenefit.emplace(APInt(128, Size), CycleSavings);
859 
860  // Return true if the savings justify the cost of inlining. Specifically,
861  // we evaluate the following inequality:
862  //
863  // CycleSavings PSI->getOrCompHotCountThreshold()
864  // -------------- >= -----------------------------------
865  // Size InlineSavingsMultiplier
866  //
867  // Note that the left hand side is specific to a call site. The right hand
868  // side is a constant for the entire executable.
869  APInt LHS = CycleSavings;
871  APInt RHS(128, PSI->getOrCompHotCountThreshold());
872  RHS *= Size;
873  return LHS.uge(RHS);
874  }
875 
876  InlineResult finalizeAnalysis() override {
877  // Loops generally act a lot like calls in that they act like barriers to
878  // movement, require a certain amount of setup, etc. So when optimising for
879  // size, we penalise any call sites that perform loops. We do this after all
880  // other costs here, so will likely only be dealing with relatively small
881  // functions (and hence DT and LI will hopefully be cheap).
882  auto *Caller = CandidateCall.getFunction();
883  if (Caller->hasMinSize()) {
884  DominatorTree DT(F);
885  LoopInfo LI(DT);
886  int NumLoops = 0;
887  for (Loop *L : LI) {
888  // Ignore loops that will not be executed
889  if (DeadBlocks.count(L->getHeader()))
890  continue;
891  NumLoops++;
892  }
893  addCost(NumLoops * InlineConstants::LoopPenalty);
894  }
895 
896  // We applied the maximum possible vector bonus at the beginning. Now,
897  // subtract the excess bonus, if any, from the Threshold before
898  // comparing against Cost.
899  if (NumVectorInstructions <= NumInstructions / 10)
900  Threshold -= VectorBonus;
901  else if (NumVectorInstructions <= NumInstructions / 2)
902  Threshold -= VectorBonus / 2;
903 
904  if (Optional<int> AttrCost =
905  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
906  Cost = *AttrCost;
907 
908  if (Optional<int> AttrThreshold =
909  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
910  Threshold = *AttrThreshold;
911 
912  if (auto Result = costBenefitAnalysis()) {
913  DecidedByCostBenefit = true;
914  if (Result.getValue())
915  return InlineResult::success();
916  else
917  return InlineResult::failure("Cost over threshold.");
918  }
919 
920  if (IgnoreThreshold)
921  return InlineResult::success();
922 
923  DecidedByCostThreshold = true;
924  return Cost < std::max(1, Threshold)
926  : InlineResult::failure("Cost over threshold.");
927  }
928 
929  bool shouldStop() override {
930  if (IgnoreThreshold || ComputeFullInlineCost)
931  return false;
932  // Bail out the moment we cross the threshold. This means we'll under-count
933  // the cost, but only when undercounting doesn't matter.
934  if (Cost < Threshold)
935  return false;
936  DecidedByCostThreshold = true;
937  return true;
938  }
939 
940  void onLoadEliminationOpportunity() override {
941  LoadEliminationCost += InlineConstants::InstrCost;
942  }
943 
944  InlineResult onAnalysisStart() override {
945  // Perform some tweaks to the cost and threshold based on the direct
946  // callsite information.
947 
948  // We want to more aggressively inline vector-dense kernels, so up the
949  // threshold, and we'll lower it if the % of vector instructions gets too
950  // low. Note that these bonuses are some what arbitrary and evolved over
951  // time by accident as much as because they are principled bonuses.
952  //
953  // FIXME: It would be nice to remove all such bonuses. At least it would be
954  // nice to base the bonus values on something more scientific.
955  assert(NumInstructions == 0);
956  assert(NumVectorInstructions == 0);
957 
958  // Update the threshold based on callsite properties
959  updateThreshold(CandidateCall, F);
960 
961  // While Threshold depends on commandline options that can take negative
962  // values, we want to enforce the invariant that the computed threshold and
963  // bonuses are non-negative.
964  assert(Threshold >= 0);
965  assert(SingleBBBonus >= 0);
966  assert(VectorBonus >= 0);
967 
968  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
969  // this Threshold any time, and cost cannot decrease, we can stop processing
970  // the rest of the function body.
971  Threshold += (SingleBBBonus + VectorBonus);
972 
973  // Give out bonuses for the callsite, as the instructions setting them up
974  // will be gone after inlining.
975  addCost(-getCallsiteCost(this->CandidateCall, DL));
976 
977  // If this function uses the coldcc calling convention, prefer not to inline
978  // it.
979  if (F.getCallingConv() == CallingConv::Cold)
981 
982  // Check if we're done. This can happen due to bonuses and penalties.
983  if (Cost >= Threshold && !ComputeFullInlineCost)
984  return InlineResult::failure("high cost");
985 
986  return InlineResult::success();
987  }
988 
989 public:
990  InlineCostCallAnalyzer(
991  Function &Callee, CallBase &Call, const InlineParams &Params,
992  const TargetTransformInfo &TTI,
993  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
994  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
995  ProfileSummaryInfo *PSI = nullptr,
996  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
997  bool IgnoreThreshold = false)
998  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
999  ComputeFullInlineCost(OptComputeFullInlineCost ||
1000  Params.ComputeFullInlineCost || ORE ||
1001  isCostBenefitAnalysisEnabled()),
1002  Params(Params), Threshold(Params.DefaultThreshold),
1003  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1004  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1005  Writer(this) {
1006  AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
1007  }
1008 
1009  /// Annotation Writer for instruction details
1010  InlineCostAnnotationWriter Writer;
1011 
1012  void dump();
1013 
1014  // Prints the same analysis as dump(), but its definition is not dependent
1015  // on the build.
1016  void print(raw_ostream &OS);
1017 
1018  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1019  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1020  return InstructionCostDetailMap[I];
1021  return None;
1022  }
1023 
1024  virtual ~InlineCostCallAnalyzer() {}
1025  int getThreshold() const { return Threshold; }
1026  int getCost() const { return Cost; }
1027  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1028  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1029  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1030 };
1031 
1032 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1033 private:
1034  InlineCostFeatures Cost = {};
1035 
1036  // FIXME: These constants are taken from the heuristic-based cost visitor.
1037  // These should be removed entirely in a later revision to avoid reliance on
1038  // heuristics in the ML inliner.
1039  static constexpr int JTCostMultiplier = 4;
1040  static constexpr int CaseClusterCostMultiplier = 2;
1041  static constexpr int SwitchCostMultiplier = 2;
1042 
1043  // FIXME: These are taken from the heuristic-based cost visitor: we should
1044  // eventually abstract these to the CallAnalyzer to avoid duplication.
1045  unsigned SROACostSavingOpportunities = 0;
1046  int VectorBonus = 0;
1047  int SingleBBBonus = 0;
1048  int Threshold = 5;
1049 
1051 
1052  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1053  Cost[static_cast<size_t>(Feature)] += Delta;
1054  }
1055 
1056  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1057  Cost[static_cast<size_t>(Feature)] = Value;
1058  }
1059 
1060  void onDisableSROA(AllocaInst *Arg) override {
1061  auto CostIt = SROACosts.find(Arg);
1062  if (CostIt == SROACosts.end())
1063  return;
1064 
1065  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1066  SROACostSavingOpportunities -= CostIt->second;
1067  SROACosts.erase(CostIt);
1068  }
1069 
1070  void onDisableLoadElimination() override {
1071  set(InlineCostFeatureIndex::LoadElimination, 1);
1072  }
1073 
1074  void onCallPenalty() override {
1076  }
1077 
1078  void onCallArgumentSetup(const CallBase &Call) override {
1079  increment(InlineCostFeatureIndex::CallArgumentSetup,
1080  Call.arg_size() * InlineConstants::InstrCost);
1081  }
1082 
1083  void onLoadRelativeIntrinsic() override {
1084  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
1086  }
1087 
1088  void onLoweredCall(Function *F, CallBase &Call,
1089  bool IsIndirectCall) override {
1090  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1091  Call.arg_size() * InlineConstants::InstrCost);
1092 
1093  if (IsIndirectCall) {
1094  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1095  /*HintThreshold*/ {},
1096  /*ColdThreshold*/ {},
1097  /*OptSizeThreshold*/ {},
1098  /*OptMinSizeThreshold*/ {},
1099  /*HotCallSiteThreshold*/ {},
1100  /*LocallyHotCallSiteThreshold*/ {},
1101  /*ColdCallSiteThreshold*/ {},
1102  /*ComputeFullInlineCost*/ true,
1103  /*EnableDeferral*/ true};
1104  IndirectCallParams.DefaultThreshold =
1106 
1107  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1108  GetAssumptionCache, GetBFI, PSI, ORE, false,
1109  true);
1110  if (CA.analyze().isSuccess()) {
1111  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1112  CA.getCost());
1113  increment(InlineCostFeatureIndex::NestedInlines, 1);
1114  }
1115  } else {
1116  onCallPenalty();
1117  }
1118  }
1119 
1120  void onFinalizeSwitch(unsigned JumpTableSize,
1121  unsigned NumCaseCluster) override {
1122 
1123  if (JumpTableSize) {
1124  int64_t JTCost =
1125  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
1126  JTCostMultiplier * InlineConstants::InstrCost;
1127  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1128  return;
1129  }
1130 
1131  if (NumCaseCluster <= 3) {
1132  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1133  NumCaseCluster * CaseClusterCostMultiplier *
1135  return;
1136  }
1137 
1138  int64_t ExpectedNumberOfCompare =
1139  getExpectedNumberOfCompare(NumCaseCluster);
1140 
1141  int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
1143  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1144  }
1145 
1146  void onMissedSimplification() override {
1147  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1149  }
1150 
1151  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1152  void onAggregateSROAUse(AllocaInst *Arg) override {
1153  SROACosts.find(Arg)->second += InlineConstants::InstrCost;
1154  SROACostSavingOpportunities += InlineConstants::InstrCost;
1155  }
1156 
1157  void onBlockAnalyzed(const BasicBlock *BB) override {
1158  if (BB->getTerminator()->getNumSuccessors() > 1)
1159  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1160  Threshold -= SingleBBBonus;
1161  }
1162 
1163  InlineResult finalizeAnalysis() override {
1164  auto *Caller = CandidateCall.getFunction();
1165  if (Caller->hasMinSize()) {
1166  DominatorTree DT(F);
1167  LoopInfo LI(DT);
1168  for (Loop *L : LI) {
1169  // Ignore loops that will not be executed
1170  if (DeadBlocks.count(L->getHeader()))
1171  continue;
1172  increment(InlineCostFeatureIndex::NumLoops,
1174  }
1175  }
1176  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1177  set(InlineCostFeatureIndex::SimplifiedInstructions,
1178  NumInstructionsSimplified);
1179  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1180  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1181  NumConstantOffsetPtrArgs);
1182  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1183 
1184  if (NumVectorInstructions <= NumInstructions / 10)
1185  Threshold -= VectorBonus;
1186  else if (NumVectorInstructions <= NumInstructions / 2)
1187  Threshold -= VectorBonus / 2;
1188 
1190 
1191  return InlineResult::success();
1192  }
1193 
1194  bool shouldStop() override { return false; }
1195 
1196  void onLoadEliminationOpportunity() override {
1197  increment(InlineCostFeatureIndex::LoadElimination, 1);
1198  }
1199 
1200  InlineResult onAnalysisStart() override {
1201  increment(InlineCostFeatureIndex::CallSiteCost,
1202  -1 * getCallsiteCost(this->CandidateCall, DL));
1203 
1204  set(InlineCostFeatureIndex::ColdCcPenalty,
1205  (F.getCallingConv() == CallingConv::Cold));
1206 
1207  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1208  // analyzer - instead, we should abstract it to a common method in the
1209  // CallAnalyzer
1210  int SingleBBBonusPercent = 50;
1211  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1212  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1214  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1215  VectorBonus = Threshold * VectorBonusPercent / 100;
1216  Threshold += (SingleBBBonus + VectorBonus);
1217 
1218  return InlineResult::success();
1219  }
1220 
1221 public:
1222  InlineCostFeaturesAnalyzer(
1223  const TargetTransformInfo &TTI,
1224  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1227  CallBase &Call)
1228  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1229 
1230  const InlineCostFeatures &features() const { return Cost; }
1231 };
1232 
1233 } // namespace
1234 
1235 /// Test whether the given value is an Alloca-derived function argument.
1236 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1237  return SROAArgValues.count(V);
1238 }
1239 
1240 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1241  onDisableSROA(SROAArg);
1242  EnabledSROAAllocas.erase(SROAArg);
1243  disableLoadElimination();
1244 }
1245 
1246 void InlineCostAnnotationWriter::emitInstructionAnnot(
1247  const Instruction *I, formatted_raw_ostream &OS) {
1248  // The cost of inlining of the given instruction is printed always.
1249  // The threshold delta is printed only when it is non-zero. It happens
1250  // when we decided to give a bonus at a particular instruction.
1251  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1252  if (!Record)
1253  OS << "; No analysis for the instruction";
1254  else {
1255  OS << "; cost before = " << Record->CostBefore
1256  << ", cost after = " << Record->CostAfter
1257  << ", threshold before = " << Record->ThresholdBefore
1258  << ", threshold after = " << Record->ThresholdAfter << ", ";
1259  OS << "cost delta = " << Record->getCostDelta();
1260  if (Record->hasThresholdChanged())
1261  OS << ", threshold delta = " << Record->getThresholdDelta();
1262  }
1263  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1264  if (C) {
1265  OS << ", simplified to ";
1266  C.getValue()->print(OS, true);
1267  }
1268  OS << "\n";
1269 }
1270 
1271 /// If 'V' maps to a SROA candidate, disable SROA for it.
1272 void CallAnalyzer::disableSROA(Value *V) {
1273  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1274  disableSROAForArg(SROAArg);
1275  }
1276 }
1277 
1278 void CallAnalyzer::disableLoadElimination() {
1279  if (EnableLoadElimination) {
1280  onDisableLoadElimination();
1281  EnableLoadElimination = false;
1282  }
1283 }
1284 
1285 /// Accumulate a constant GEP offset into an APInt if possible.
1286 ///
1287 /// Returns false if unable to compute the offset for any reason. Respects any
1288 /// simplified values known during the analysis of this callsite.
1289 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1290  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1291  assert(IntPtrWidth == Offset.getBitWidth());
1292 
1293  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1294  GTI != GTE; ++GTI) {
1295  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1296  if (!OpC)
1297  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1298  OpC = dyn_cast<ConstantInt>(SimpleOp);
1299  if (!OpC)
1300  return false;
1301  if (OpC->isZero())
1302  continue;
1303 
1304  // Handle a struct index, which adds its field offset to the pointer.
1305  if (StructType *STy = GTI.getStructTypeOrNull()) {
1306  unsigned ElementIdx = OpC->getZExtValue();
1307  const StructLayout *SL = DL.getStructLayout(STy);
1308  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1309  continue;
1310  }
1311 
1312  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1313  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1314  }
1315  return true;
1316 }
1317 
1318 /// Use TTI to check whether a GEP is free.
1319 ///
1320 /// Respects any simplified values known during the analysis of this callsite.
1321 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1323  Operands.push_back(GEP.getOperand(0));
1324  for (const Use &Op : GEP.indices())
1325  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1326  Operands.push_back(SimpleOp);
1327  else
1328  Operands.push_back(Op);
1329  return TTI.getUserCost(&GEP, Operands,
1332 }
1333 
1334 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1335  disableSROA(I.getOperand(0));
1336 
1337  // Check whether inlining will turn a dynamic alloca into a static
1338  // alloca and handle that case.
1339  if (I.isArrayAllocation()) {
1340  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1341  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1342  // Sometimes a dynamic alloca could be converted into a static alloca
1343  // after this constant prop, and become a huge static alloca on an
1344  // unconditional CFG path. Avoid inlining if this is going to happen above
1345  // a threshold.
1346  // FIXME: If the threshold is removed or lowered too much, we could end up
1347  // being too pessimistic and prevent inlining non-problematic code. This
1348  // could result in unintended perf regressions. A better overall strategy
1349  // is needed to track stack usage during inlining.
1350  Type *Ty = I.getAllocatedType();
1351  AllocatedSize = SaturatingMultiplyAdd(
1352  AllocSize->getLimitedValue(),
1353  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1355  HasDynamicAlloca = true;
1356  return false;
1357  }
1358  }
1359 
1360  // Accumulate the allocated size.
1361  if (I.isStaticAlloca()) {
1362  Type *Ty = I.getAllocatedType();
1363  AllocatedSize =
1364  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1365  }
1366 
1367  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1368  // a variety of reasons, and so we would like to not inline them into
1369  // functions which don't currently have a dynamic alloca. This simply
1370  // disables inlining altogether in the presence of a dynamic alloca.
1371  if (!I.isStaticAlloca())
1372  HasDynamicAlloca = true;
1373 
1374  return false;
1375 }
1376 
1377 bool CallAnalyzer::visitPHI(PHINode &I) {
1378  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1379  // though we don't want to propagate it's bonuses. The idea is to disable
1380  // SROA if it *might* be used in an inappropriate manner.
1381 
1382  // Phi nodes are always zero-cost.
1383  // FIXME: Pointer sizes may differ between different address spaces, so do we
1384  // need to use correct address space in the call to getPointerSizeInBits here?
1385  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1386  // see the ZeroOffset is used as a dummy value, so we can probably use any
1387  // bit width for the ZeroOffset?
1388  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1389  bool CheckSROA = I.getType()->isPointerTy();
1390 
1391  // Track the constant or pointer with constant offset we've seen so far.
1392  Constant *FirstC = nullptr;
1393  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1394  Value *FirstV = nullptr;
1395 
1396  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1397  BasicBlock *Pred = I.getIncomingBlock(i);
1398  // If the incoming block is dead, skip the incoming block.
1399  if (DeadBlocks.count(Pred))
1400  continue;
1401  // If the parent block of phi is not the known successor of the incoming
1402  // block, skip the incoming block.
1403  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1404  if (KnownSuccessor && KnownSuccessor != I.getParent())
1405  continue;
1406 
1407  Value *V = I.getIncomingValue(i);
1408  // If the incoming value is this phi itself, skip the incoming value.
1409  if (&I == V)
1410  continue;
1411 
1412  Constant *C = dyn_cast<Constant>(V);
1413  if (!C)
1414  C = SimplifiedValues.lookup(V);
1415 
1416  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1417  if (!C && CheckSROA)
1418  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1419 
1420  if (!C && !BaseAndOffset.first)
1421  // The incoming value is neither a constant nor a pointer with constant
1422  // offset, exit early.
1423  return true;
1424 
1425  if (FirstC) {
1426  if (FirstC == C)
1427  // If we've seen a constant incoming value before and it is the same
1428  // constant we see this time, continue checking the next incoming value.
1429  continue;
1430  // Otherwise early exit because we either see a different constant or saw
1431  // a constant before but we have a pointer with constant offset this time.
1432  return true;
1433  }
1434 
1435  if (FirstV) {
1436  // The same logic as above, but check pointer with constant offset here.
1437  if (FirstBaseAndOffset == BaseAndOffset)
1438  continue;
1439  return true;
1440  }
1441 
1442  if (C) {
1443  // This is the 1st time we've seen a constant, record it.
1444  FirstC = C;
1445  continue;
1446  }
1447 
1448  // The remaining case is that this is the 1st time we've seen a pointer with
1449  // constant offset, record it.
1450  FirstV = V;
1451  FirstBaseAndOffset = BaseAndOffset;
1452  }
1453 
1454  // Check if we can map phi to a constant.
1455  if (FirstC) {
1456  SimplifiedValues[&I] = FirstC;
1457  return true;
1458  }
1459 
1460  // Check if we can map phi to a pointer with constant offset.
1461  if (FirstBaseAndOffset.first) {
1462  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1463 
1464  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1465  SROAArgValues[&I] = SROAArg;
1466  }
1467 
1468  return true;
1469 }
1470 
1471 /// Check we can fold GEPs of constant-offset call site argument pointers.
1472 /// This requires target data and inbounds GEPs.
1473 ///
1474 /// \return true if the specified GEP can be folded.
1475 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1476  // Check if we have a base + offset for the pointer.
1477  std::pair<Value *, APInt> BaseAndOffset =
1478  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1479  if (!BaseAndOffset.first)
1480  return false;
1481 
1482  // Check if the offset of this GEP is constant, and if so accumulate it
1483  // into Offset.
1484  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1485  return false;
1486 
1487  // Add the result as a new mapping to Base + Offset.
1488  ConstantOffsetPtrs[&I] = BaseAndOffset;
1489 
1490  return true;
1491 }
1492 
1493 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1494  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1495 
1496  // Lambda to check whether a GEP's indices are all constant.
1497  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1498  for (const Use &Op : GEP.indices())
1499  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1500  return false;
1501  return true;
1502  };
1503 
1505  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1507  for (unsigned int Index = 1; Index < COps.size(); ++Index)
1508  Indices.push_back(COps[Index]);
1510  I.getSourceElementType(), COps[0], Indices, I.isInBounds());
1511  }))
1512  return true;
1513 
1514  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1515  if (SROAArg)
1516  SROAArgValues[&I] = SROAArg;
1517 
1518  // Constant GEPs are modeled as free.
1519  return true;
1520  }
1521 
1522  // Variable GEPs will require math and will disable SROA.
1523  if (SROAArg)
1524  disableSROAForArg(SROAArg);
1525  return isGEPFree(I);
1526 }
1527 
1528 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1529 /// \p Evaluate is a callable specific to instruction type that evaluates the
1530 /// instruction when all the operands are constants.
1531 template <typename Callable>
1532 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
1534  for (Value *Op : I.operands()) {
1535  Constant *COp = dyn_cast<Constant>(Op);
1536  if (!COp)
1537  COp = SimplifiedValues.lookup(Op);
1538  if (!COp)
1539  return false;
1540  COps.push_back(COp);
1541  }
1542  auto *C = Evaluate(COps);
1543  if (!C)
1544  return false;
1545  SimplifiedValues[&I] = C;
1546  return true;
1547 }
1548 
1549 /// Try to simplify a call to llvm.is.constant.
1550 ///
1551 /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1552 /// we expect calls of this specific intrinsic to be infrequent.
1553 ///
1554 /// FIXME: Given that we know CB's parent (F) caller
1555 /// (CandidateCall->getParent()->getParent()), we might be able to determine
1556 /// whether inlining F into F's caller would change how the call to
1557 /// llvm.is.constant would evaluate.
1558 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1559  Value *Arg = CB.getArgOperand(0);
1560  auto *C = dyn_cast<Constant>(Arg);
1561 
1562  if (!C)
1563  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
1564 
1565  Type *RT = CB.getFunctionType()->getReturnType();
1566  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1567  return true;
1568 }
1569 
1570 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1571  // Propagate constants through bitcasts.
1572  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1573  return ConstantExpr::getBitCast(COps[0], I.getType());
1574  }))
1575  return true;
1576 
1577  // Track base/offsets through casts
1578  std::pair<Value *, APInt> BaseAndOffset =
1579  ConstantOffsetPtrs.lookup(I.getOperand(0));
1580  // Casts don't change the offset, just wrap it up.
1581  if (BaseAndOffset.first)
1582  ConstantOffsetPtrs[&I] = BaseAndOffset;
1583 
1584  // Also look for SROA candidates here.
1585  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1586  SROAArgValues[&I] = SROAArg;
1587 
1588  // Bitcasts are always zero cost.
1589  return true;
1590 }
1591 
1592 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1593  // Propagate constants through ptrtoint.
1594  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1595  return ConstantExpr::getPtrToInt(COps[0], I.getType());
1596  }))
1597  return true;
1598 
1599  // Track base/offset pairs when converted to a plain integer provided the
1600  // integer is large enough to represent the pointer.
1601  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1602  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1603  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1604  std::pair<Value *, APInt> BaseAndOffset =
1605  ConstantOffsetPtrs.lookup(I.getOperand(0));
1606  if (BaseAndOffset.first)
1607  ConstantOffsetPtrs[&I] = BaseAndOffset;
1608  }
1609 
1610  // This is really weird. Technically, ptrtoint will disable SROA. However,
1611  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1612  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1613  // would block SROA would also block SROA if applied directly to a pointer,
1614  // and so we can just add the integer in here. The only places where SROA is
1615  // preserved either cannot fire on an integer, or won't in-and-of themselves
1616  // disable SROA (ext) w/o some later use that we would see and disable.
1617  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1618  SROAArgValues[&I] = SROAArg;
1619 
1622 }
1623 
1624 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1625  // Propagate constants through ptrtoint.
1626  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1627  return ConstantExpr::getIntToPtr(COps[0], I.getType());
1628  }))
1629  return true;
1630 
1631  // Track base/offset pairs when round-tripped through a pointer without
1632  // modifications provided the integer is not too large.
1633  Value *Op = I.getOperand(0);
1634  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1635  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1636  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1637  if (BaseAndOffset.first)
1638  ConstantOffsetPtrs[&I] = BaseAndOffset;
1639  }
1640 
1641  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1642  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1643  SROAArgValues[&I] = SROAArg;
1644 
1647 }
1648 
1649 bool CallAnalyzer::visitCastInst(CastInst &I) {
1650  // Propagate constants through casts.
1651  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1652  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
1653  }))
1654  return true;
1655 
1656  // Disable SROA in the face of arbitrary casts we don't explicitly list
1657  // elsewhere.
1658  disableSROA(I.getOperand(0));
1659 
1660  // If this is a floating-point cast, and the target says this operation
1661  // is expensive, this may eventually become a library call. Treat the cost
1662  // as such.
1663  switch (I.getOpcode()) {
1664  case Instruction::FPTrunc:
1665  case Instruction::FPExt:
1666  case Instruction::UIToFP:
1667  case Instruction::SIToFP:
1668  case Instruction::FPToUI:
1669  case Instruction::FPToSI:
1671  onCallPenalty();
1672  break;
1673  default:
1674  break;
1675  }
1676 
1679 }
1680 
1681 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1682  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1683 }
1684 
1685 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1686  // Does the *call site* have the NonNull attribute set on an argument? We
1687  // use the attribute on the call site to memoize any analysis done in the
1688  // caller. This will also trip if the callee function has a non-null
1689  // parameter attribute, but that's a less interesting case because hopefully
1690  // the callee would already have been simplified based on that.
1691  if (Argument *A = dyn_cast<Argument>(V))
1692  if (paramHasAttr(A, Attribute::NonNull))
1693  return true;
1694 
1695  // Is this an alloca in the caller? This is distinct from the attribute case
1696  // above because attributes aren't updated within the inliner itself and we
1697  // always want to catch the alloca derived case.
1698  if (isAllocaDerivedArg(V))
1699  // We can actually predict the result of comparisons between an
1700  // alloca-derived value and null. Note that this fires regardless of
1701  // SROA firing.
1702  return true;
1703 
1704  return false;
1705 }
1706 
1707 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1708  // If the normal destination of the invoke or the parent block of the call
1709  // site is unreachable-terminated, there is little point in inlining this
1710  // unless there is literally zero cost.
1711  // FIXME: Note that it is possible that an unreachable-terminated block has a
1712  // hot entry. For example, in below scenario inlining hot_call_X() may be
1713  // beneficial :
1714  // main() {
1715  // hot_call_1();
1716  // ...
1717  // hot_call_N()
1718  // exit(0);
1719  // }
1720  // For now, we are not handling this corner case here as it is rare in real
1721  // code. In future, we should elaborate this based on BPI and BFI in more
1722  // general threshold adjusting heuristics in updateThreshold().
1723  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1724  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1725  return false;
1726  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1727  return false;
1728 
1729  return true;
1730 }
1731 
1733  BlockFrequencyInfo *CallerBFI) {
1734  // If global profile summary is available, then callsite's coldness is
1735  // determined based on that.
1736  if (PSI && PSI->hasProfileSummary())
1737  return PSI->isColdCallSite(Call, CallerBFI);
1738 
1739  // Otherwise we need BFI to be available.
1740  if (!CallerBFI)
1741  return false;
1742 
1743  // Determine if the callsite is cold relative to caller's entry. We could
1744  // potentially cache the computation of scaled entry frequency, but the added
1745  // complexity is not worth it unless this scaling shows up high in the
1746  // profiles.
1747  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1748  auto CallSiteBB = Call.getParent();
1749  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1750  auto CallerEntryFreq =
1751  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1752  return CallSiteFreq < CallerEntryFreq * ColdProb;
1753 }
1754 
1756 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1757  BlockFrequencyInfo *CallerBFI) {
1758 
1759  // If global profile summary is available, then callsite's hotness is
1760  // determined based on that.
1761  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1762  return Params.HotCallSiteThreshold;
1763 
1764  // Otherwise we need BFI to be available and to have a locally hot callsite
1765  // threshold.
1766  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1767  return None;
1768 
1769  // Determine if the callsite is hot relative to caller's entry. We could
1770  // potentially cache the computation of scaled entry frequency, but the added
1771  // complexity is not worth it unless this scaling shows up high in the
1772  // profiles.
1773  auto CallSiteBB = Call.getParent();
1774  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1775  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1776  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1777  return Params.LocallyHotCallSiteThreshold;
1778 
1779  // Otherwise treat it normally.
1780  return None;
1781 }
1782 
1783 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1784  // If no size growth is allowed for this inlining, set Threshold to 0.
1785  if (!allowSizeGrowth(Call)) {
1786  Threshold = 0;
1787  return;
1788  }
1789 
1790  Function *Caller = Call.getCaller();
1791 
1792  // return min(A, B) if B is valid.
1793  auto MinIfValid = [](int A, Optional<int> B) {
1794  return B ? std::min(A, B.getValue()) : A;
1795  };
1796 
1797  // return max(A, B) if B is valid.
1798  auto MaxIfValid = [](int A, Optional<int> B) {
1799  return B ? std::max(A, B.getValue()) : A;
1800  };
1801 
1802  // Various bonus percentages. These are multiplied by Threshold to get the
1803  // bonus values.
1804  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1805  // basic block at the given callsite context. This is speculatively applied
1806  // and withdrawn if more than one basic block is seen.
1807  //
1808  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1809  // of the last call to a static function as inlining such functions is
1810  // guaranteed to reduce code size.
1811  //
1812  // These bonus percentages may be set to 0 based on properties of the caller
1813  // and the callsite.
1814  int SingleBBBonusPercent = 50;
1815  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1817 
1818  // Lambda to set all the above bonus and bonus percentages to 0.
1819  auto DisallowAllBonuses = [&]() {
1820  SingleBBBonusPercent = 0;
1821  VectorBonusPercent = 0;
1823  };
1824 
1825  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1826  // and reduce the threshold if the caller has the necessary attribute.
1827  if (Caller->hasMinSize()) {
1828  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1829  // For minsize, we want to disable the single BB bonus and the vector
1830  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1831  // a static function will, at the minimum, eliminate the parameter setup and
1832  // call/return instructions.
1833  SingleBBBonusPercent = 0;
1834  VectorBonusPercent = 0;
1835  } else if (Caller->hasOptSize())
1836  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1837 
1838  // Adjust the threshold based on inlinehint attribute and profile based
1839  // hotness information if the caller does not have MinSize attribute.
1840  if (!Caller->hasMinSize()) {
1841  if (Callee.hasFnAttribute(Attribute::InlineHint))
1842  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1843 
1844  // FIXME: After switching to the new passmanager, simplify the logic below
1845  // by checking only the callsite hotness/coldness as we will reliably
1846  // have local profile information.
1847  //
1848  // Callsite hotness and coldness can be determined if sample profile is
1849  // used (which adds hotness metadata to calls) or if caller's
1850  // BlockFrequencyInfo is available.
1851  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1852  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1853  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1854  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1855  // FIXME: This should update the threshold only if it exceeds the
1856  // current threshold, but AutoFDO + ThinLTO currently relies on this
1857  // behavior to prevent inlining of hot callsites during ThinLTO
1858  // compile phase.
1859  Threshold = HotCallSiteThreshold.getValue();
1860  } else if (isColdCallSite(Call, CallerBFI)) {
1861  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1862  // Do not apply bonuses for a cold callsite including the
1863  // LastCallToStatic bonus. While this bonus might result in code size
1864  // reduction, it can cause the size of a non-cold caller to increase
1865  // preventing it from being inlined.
1866  DisallowAllBonuses();
1867  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1868  } else if (PSI) {
1869  // Use callee's global profile information only if we have no way of
1870  // determining this via callsite information.
1871  if (PSI->isFunctionEntryHot(&Callee)) {
1872  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1873  // If callsite hotness can not be determined, we may still know
1874  // that the callee is hot and treat it as a weaker hint for threshold
1875  // increase.
1876  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1877  } else if (PSI->isFunctionEntryCold(&Callee)) {
1878  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1879  // Do not apply bonuses for a cold callee including the
1880  // LastCallToStatic bonus. While this bonus might result in code size
1881  // reduction, it can cause the size of a non-cold caller to increase
1882  // preventing it from being inlined.
1883  DisallowAllBonuses();
1884  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1885  }
1886  }
1887  }
1888 
1890 
1891  // Finally, take the target-specific inlining threshold multiplier into
1892  // account.
1894 
1895  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1896  VectorBonus = Threshold * VectorBonusPercent / 100;
1897 
1898  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
1899  &F == Call.getCalledFunction();
1900  // If there is only one call of the function, and it has internal linkage,
1901  // the cost of inlining it drops dramatically. It may seem odd to update
1902  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1903  if (OnlyOneCallAndLocalLinkage)
1904  Cost -= LastCallToStaticBonus;
1905 }
1906 
1907 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1908  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1909  // First try to handle simplified comparisons.
1910  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1911  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
1912  }))
1913  return true;
1914 
1915  if (I.getOpcode() == Instruction::FCmp)
1916  return false;
1917 
1918  // Otherwise look for a comparison between constant offset pointers with
1919  // a common base.
1920  Value *LHSBase, *RHSBase;
1921  APInt LHSOffset, RHSOffset;
1922  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1923  if (LHSBase) {
1924  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1925  if (RHSBase && LHSBase == RHSBase) {
1926  // We have common bases, fold the icmp to a constant based on the
1927  // offsets.
1928  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1929  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1930  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1931  SimplifiedValues[&I] = C;
1932  ++NumConstantPtrCmps;
1933  return true;
1934  }
1935  }
1936  }
1937 
1938  // If the comparison is an equality comparison with null, we can simplify it
1939  // if we know the value (argument) can't be null
1940  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1941  isKnownNonNullInCallee(I.getOperand(0))) {
1942  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1943  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1944  : ConstantInt::getFalse(I.getType());
1945  return true;
1946  }
1947  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1948 }
1949 
1950 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1951  // Try to handle a special case: we can fold computing the difference of two
1952  // constant-related pointers.
1953  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1954  Value *LHSBase, *RHSBase;
1955  APInt LHSOffset, RHSOffset;
1956  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1957  if (LHSBase) {
1958  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1959  if (RHSBase && LHSBase == RHSBase) {
1960  // We have common bases, fold the subtract to a constant based on the
1961  // offsets.
1962  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1963  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1964  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1965  SimplifiedValues[&I] = C;
1966  ++NumConstantPtrDiffs;
1967  return true;
1968  }
1969  }
1970  }
1971 
1972  // Otherwise, fall back to the generic logic for simplifying and handling
1973  // instructions.
1974  return Base::visitSub(I);
1975 }
1976 
1977 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1978  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1979  Constant *CLHS = dyn_cast<Constant>(LHS);
1980  if (!CLHS)
1981  CLHS = SimplifiedValues.lookup(LHS);
1982  Constant *CRHS = dyn_cast<Constant>(RHS);
1983  if (!CRHS)
1984  CRHS = SimplifiedValues.lookup(RHS);
1985 
1986  Value *SimpleV = nullptr;
1987  if (auto FI = dyn_cast<FPMathOperator>(&I))
1988  SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
1989  FI->getFastMathFlags(), DL);
1990  else
1991  SimpleV =
1992  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1993 
1994  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1995  SimplifiedValues[&I] = C;
1996 
1997  if (SimpleV)
1998  return true;
1999 
2000  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2001  disableSROA(LHS);
2002  disableSROA(RHS);
2003 
2004  // If the instruction is floating point, and the target says this operation
2005  // is expensive, this may eventually become a library call. Treat the cost
2006  // as such. Unless it's fneg which can be implemented with an xor.
2007  using namespace llvm::PatternMatch;
2008  if (I.getType()->isFloatingPointTy() &&
2010  !match(&I, m_FNeg(m_Value())))
2011  onCallPenalty();
2012 
2013  return false;
2014 }
2015 
2016 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2017  Value *Op = I.getOperand(0);
2018  Constant *COp = dyn_cast<Constant>(Op);
2019  if (!COp)
2020  COp = SimplifiedValues.lookup(Op);
2021 
2022  Value *SimpleV = SimplifyFNegInst(
2023  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2024 
2025  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2026  SimplifiedValues[&I] = C;
2027 
2028  if (SimpleV)
2029  return true;
2030 
2031  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2032  disableSROA(Op);
2033 
2034  return false;
2035 }
2036 
2037 bool CallAnalyzer::visitLoad(LoadInst &I) {
2038  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2039  return true;
2040 
2041  // If the data is already loaded from this address and hasn't been clobbered
2042  // by any stores or calls, this load is likely to be redundant and can be
2043  // eliminated.
2044  if (EnableLoadElimination &&
2045  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2046  onLoadEliminationOpportunity();
2047  return true;
2048  }
2049 
2050  return false;
2051 }
2052 
2053 bool CallAnalyzer::visitStore(StoreInst &I) {
2054  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2055  return true;
2056 
2057  // The store can potentially clobber loads and prevent repeated loads from
2058  // being eliminated.
2059  // FIXME:
2060  // 1. We can probably keep an initial set of eliminatable loads substracted
2061  // from the cost even when we finally see a store. We just need to disable
2062  // *further* accumulation of elimination savings.
2063  // 2. We should probably at some point thread MemorySSA for the callee into
2064  // this and then use that to actually compute *really* precise savings.
2065  disableLoadElimination();
2066  return false;
2067 }
2068 
2069 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2070  // Constant folding for extract value is trivial.
2071  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2072  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
2073  }))
2074  return true;
2075 
2076  // SROA can't look through these, but they may be free.
2077  return Base::visitExtractValue(I);
2078 }
2079 
2080 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2081  // Constant folding for insert value is trivial.
2082  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2083  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
2084  /*InsertedValueOperand*/ COps[1],
2085  I.getIndices());
2086  }))
2087  return true;
2088 
2089  // SROA can't look through these, but they may be free.
2090  return Base::visitInsertValue(I);
2091 }
2092 
2093 /// Try to simplify a call site.
2094 ///
2095 /// Takes a concrete function and callsite and tries to actually simplify it by
2096 /// analyzing the arguments and call itself with instsimplify. Returns true if
2097 /// it has simplified the callsite to some other entity (a constant), making it
2098 /// free.
2099 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2100  // FIXME: Using the instsimplify logic directly for this is inefficient
2101  // because we have to continually rebuild the argument list even when no
2102  // simplifications can be performed. Until that is fixed with remapping
2103  // inside of instsimplify, directly constant fold calls here.
2104  if (!canConstantFoldCallTo(&Call, F))
2105  return false;
2106 
2107  // Try to re-map the arguments to constants.
2108  SmallVector<Constant *, 4> ConstantArgs;
2109  ConstantArgs.reserve(Call.arg_size());
2110  for (Value *I : Call.args()) {
2111  Constant *C = dyn_cast<Constant>(I);
2112  if (!C)
2113  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2114  if (!C)
2115  return false; // This argument doesn't map to a constant.
2116 
2117  ConstantArgs.push_back(C);
2118  }
2119  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2120  SimplifiedValues[&Call] = C;
2121  return true;
2122  }
2123 
2124  return false;
2125 }
2126 
2127 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2128  if (!onCallBaseVisitStart(Call))
2129  return true;
2130 
2131  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2132  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2133  // This aborts the entire analysis.
2134  ExposesReturnsTwice = true;
2135  return false;
2136  }
2137  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2138  ContainsNoDuplicateCall = true;
2139 
2140  Value *Callee = Call.getCalledOperand();
2141  Function *F = dyn_cast_or_null<Function>(Callee);
2142  bool IsIndirectCall = !F;
2143  if (IsIndirectCall) {
2144  // Check if this happens to be an indirect function call to a known function
2145  // in this inline context. If not, we've done all we can.
2146  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2147  if (!F) {
2148  onCallArgumentSetup(Call);
2149 
2150  if (!Call.onlyReadsMemory())
2151  disableLoadElimination();
2152  return Base::visitCallBase(Call);
2153  }
2154  }
2155 
2156  assert(F && "Expected a call to a known function");
2157 
2158  // When we have a concrete function, first try to simplify it directly.
2159  if (simplifyCallSite(F, Call))
2160  return true;
2161 
2162  // Next check if it is an intrinsic we know about.
2163  // FIXME: Lift this into part of the InstVisitor.
2164  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2165  switch (II->getIntrinsicID()) {
2166  default:
2167  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2168  disableLoadElimination();
2169  return Base::visitCallBase(Call);
2170 
2171  case Intrinsic::load_relative:
2172  onLoadRelativeIntrinsic();
2173  return false;
2174 
2175  case Intrinsic::memset:
2176  case Intrinsic::memcpy:
2177  case Intrinsic::memmove:
2178  disableLoadElimination();
2179  // SROA can usually chew through these intrinsics, but they aren't free.
2180  return false;
2181  case Intrinsic::icall_branch_funnel:
2182  case Intrinsic::localescape:
2183  HasUninlineableIntrinsic = true;
2184  return false;
2185  case Intrinsic::vastart:
2186  InitsVargArgs = true;
2187  return false;
2188  case Intrinsic::launder_invariant_group:
2189  case Intrinsic::strip_invariant_group:
2190  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2191  SROAArgValues[II] = SROAArg;
2192  return true;
2193  case Intrinsic::is_constant:
2194  return simplifyIntrinsicCallIsConstant(Call);
2195  }
2196  }
2197 
2198  if (F == Call.getFunction()) {
2199  // This flag will fully abort the analysis, so don't bother with anything
2200  // else.
2201  IsRecursiveCall = true;
2202  if (!AllowRecursiveCall)
2203  return false;
2204  }
2205 
2206  if (TTI.isLoweredToCall(F)) {
2207  onLoweredCall(F, Call, IsIndirectCall);
2208  }
2209 
2210  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2211  disableLoadElimination();
2212  return Base::visitCallBase(Call);
2213 }
2214 
2215 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2216  // At least one return instruction will be free after inlining.
2217  bool Free = !HasReturn;
2218  HasReturn = true;
2219  return Free;
2220 }
2221 
2222 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2223  // We model unconditional branches as essentially free -- they really
2224  // shouldn't exist at all, but handling them makes the behavior of the
2225  // inliner more regular and predictable. Interestingly, conditional branches
2226  // which will fold away are also free.
2227  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2228  isa_and_nonnull<ConstantInt>(
2229  SimplifiedValues.lookup(BI.getCondition()));
2230 }
2231 
2232 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2233  bool CheckSROA = SI.getType()->isPointerTy();
2234  Value *TrueVal = SI.getTrueValue();
2235  Value *FalseVal = SI.getFalseValue();
2236 
2237  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2238  if (!TrueC)
2239  TrueC = SimplifiedValues.lookup(TrueVal);
2240  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2241  if (!FalseC)
2242  FalseC = SimplifiedValues.lookup(FalseVal);
2243  Constant *CondC =
2244  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2245 
2246  if (!CondC) {
2247  // Select C, X, X => X
2248  if (TrueC == FalseC && TrueC) {
2249  SimplifiedValues[&SI] = TrueC;
2250  return true;
2251  }
2252 
2253  if (!CheckSROA)
2254  return Base::visitSelectInst(SI);
2255 
2256  std::pair<Value *, APInt> TrueBaseAndOffset =
2257  ConstantOffsetPtrs.lookup(TrueVal);
2258  std::pair<Value *, APInt> FalseBaseAndOffset =
2259  ConstantOffsetPtrs.lookup(FalseVal);
2260  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2261  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2262 
2263  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2264  SROAArgValues[&SI] = SROAArg;
2265  return true;
2266  }
2267 
2268  return Base::visitSelectInst(SI);
2269  }
2270 
2271  // Select condition is a constant.
2272  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2273  : (CondC->isNullValue()) ? FalseVal
2274  : nullptr;
2275  if (!SelectedV) {
2276  // Condition is a vector constant that is not all 1s or all 0s. If all
2277  // operands are constants, ConstantExpr::getSelect() can handle the cases
2278  // such as select vectors.
2279  if (TrueC && FalseC) {
2280  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2281  SimplifiedValues[&SI] = C;
2282  return true;
2283  }
2284  }
2285  return Base::visitSelectInst(SI);
2286  }
2287 
2288  // Condition is either all 1s or all 0s. SI can be simplified.
2289  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2290  SimplifiedValues[&SI] = SelectedC;
2291  return true;
2292  }
2293 
2294  if (!CheckSROA)
2295  return true;
2296 
2297  std::pair<Value *, APInt> BaseAndOffset =
2298  ConstantOffsetPtrs.lookup(SelectedV);
2299  if (BaseAndOffset.first) {
2300  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2301 
2302  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2303  SROAArgValues[&SI] = SROAArg;
2304  }
2305 
2306  return true;
2307 }
2308 
2309 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2310  // We model unconditional switches as free, see the comments on handling
2311  // branches.
2312  if (isa<ConstantInt>(SI.getCondition()))
2313  return true;
2314  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2315  if (isa<ConstantInt>(V))
2316  return true;
2317 
2318  // Assume the most general case where the switch is lowered into
2319  // either a jump table, bit test, or a balanced binary tree consisting of
2320  // case clusters without merging adjacent clusters with the same
2321  // destination. We do not consider the switches that are lowered with a mix
2322  // of jump table/bit test/binary search tree. The cost of the switch is
2323  // proportional to the size of the tree or the size of jump table range.
2324  //
2325  // NB: We convert large switches which are just used to initialize large phi
2326  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2327  // inlining those. It will prevent inlining in cases where the optimization
2328  // does not (yet) fire.
2329 
2330  unsigned JumpTableSize = 0;
2331  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2332  unsigned NumCaseCluster =
2333  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2334 
2335  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2336  return false;
2337 }
2338 
2339 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2340  // We never want to inline functions that contain an indirectbr. This is
2341  // incorrect because all the blockaddress's (in static global initializers
2342  // for example) would be referring to the original function, and this
2343  // indirect jump would jump from the inlined copy of the function into the
2344  // original function which is extremely undefined behavior.
2345  // FIXME: This logic isn't really right; we can safely inline functions with
2346  // indirectbr's as long as no other function or global references the
2347  // blockaddress of a block within the current function.
2348  HasIndirectBr = true;
2349  return false;
2350 }
2351 
2352 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2353  // FIXME: It's not clear that a single instruction is an accurate model for
2354  // the inline cost of a resume instruction.
2355  return false;
2356 }
2357 
2358 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2359  // FIXME: It's not clear that a single instruction is an accurate model for
2360  // the inline cost of a cleanupret instruction.
2361  return false;
2362 }
2363 
2364 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2365  // FIXME: It's not clear that a single instruction is an accurate model for
2366  // the inline cost of a catchret instruction.
2367  return false;
2368 }
2369 
2370 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2371  // FIXME: It might be reasonably to discount the cost of instructions leading
2372  // to unreachable as they have the lowest possible impact on both runtime and
2373  // code size.
2374  return true; // No actual code is needed for unreachable.
2375 }
2376 
2377 bool CallAnalyzer::visitInstruction(Instruction &I) {
2378  // Some instructions are free. All of the free intrinsics can also be
2379  // handled by SROA, etc.
2382  return true;
2383 
2384  // We found something we don't understand or can't handle. Mark any SROA-able
2385  // values in the operand list as no longer viable.
2386  for (const Use &Op : I.operands())
2387  disableSROA(Op);
2388 
2389  return false;
2390 }
2391 
2392 /// Analyze a basic block for its contribution to the inline cost.
2393 ///
2394 /// This method walks the analyzer over every instruction in the given basic
2395 /// block and accounts for their cost during inlining at this callsite. It
2396 /// aborts early if the threshold has been exceeded or an impossible to inline
2397 /// construct has been detected. It returns false if inlining is no longer
2398 /// viable, and true if inlining remains viable.
2400 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2401  SmallPtrSetImpl<const Value *> &EphValues) {
2402  for (Instruction &I : *BB) {
2403  // FIXME: Currently, the number of instructions in a function regardless of
2404  // our ability to simplify them during inline to constants or dead code,
2405  // are actually used by the vector bonus heuristic. As long as that's true,
2406  // we have to special case debug intrinsics here to prevent differences in
2407  // inlining due to debug symbols. Eventually, the number of unsimplified
2408  // instructions shouldn't factor into the cost computation, but until then,
2409  // hack around it here.
2410  // Similarly, skip pseudo-probes.
2411  if (I.isDebugOrPseudoInst())
2412  continue;
2413 
2414  // Skip ephemeral values.
2415  if (EphValues.count(&I))
2416  continue;
2417 
2418  ++NumInstructions;
2419  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2420  ++NumVectorInstructions;
2421 
2422  // If the instruction simplified to a constant, there is no cost to this
2423  // instruction. Visit the instructions using our InstVisitor to account for
2424  // all of the per-instruction logic. The visit tree returns true if we
2425  // consumed the instruction in any way, and false if the instruction's base
2426  // cost should count against inlining.
2427  onInstructionAnalysisStart(&I);
2428 
2429  if (Base::visit(&I))
2430  ++NumInstructionsSimplified;
2431  else
2432  onMissedSimplification();
2433 
2434  onInstructionAnalysisFinish(&I);
2435  using namespace ore;
2436  // If the visit this instruction detected an uninlinable pattern, abort.
2438  if (IsRecursiveCall && !AllowRecursiveCall)
2439  IR = InlineResult::failure("recursive");
2440  else if (ExposesReturnsTwice)
2441  IR = InlineResult::failure("exposes returns twice");
2442  else if (HasDynamicAlloca)
2443  IR = InlineResult::failure("dynamic alloca");
2444  else if (HasIndirectBr)
2445  IR = InlineResult::failure("indirect branch");
2446  else if (HasUninlineableIntrinsic)
2447  IR = InlineResult::failure("uninlinable intrinsic");
2448  else if (InitsVargArgs)
2449  IR = InlineResult::failure("varargs");
2450  if (!IR.isSuccess()) {
2451  if (ORE)
2452  ORE->emit([&]() {
2453  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2454  &CandidateCall)
2455  << NV("Callee", &F) << " has uninlinable pattern ("
2456  << NV("InlineResult", IR.getFailureReason())
2457  << ") and cost is not fully computed";
2458  });
2459  return IR;
2460  }
2461 
2462  // If the caller is a recursive function then we don't want to inline
2463  // functions which allocate a lot of stack space because it would increase
2464  // the caller stack usage dramatically.
2465  if (IsCallerRecursive &&
2467  auto IR =
2468  InlineResult::failure("recursive and allocates too much stack space");
2469  if (ORE)
2470  ORE->emit([&]() {
2471  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2472  &CandidateCall)
2473  << NV("Callee", &F) << " is "
2474  << NV("InlineResult", IR.getFailureReason())
2475  << ". Cost is not fully computed";
2476  });
2477  return IR;
2478  }
2479 
2480  if (shouldStop())
2481  return InlineResult::failure(
2482  "Call site analysis is not favorable to inlining.");
2483  }
2484 
2485  return InlineResult::success();
2486 }
2487 
2488 /// Compute the base pointer and cumulative constant offsets for V.
2489 ///
2490 /// This strips all constant offsets off of V, leaving it the base pointer, and
2491 /// accumulates the total constant offset applied in the returned constant. It
2492 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2493 /// no constant offsets applied.
2494 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2495  if (!V->getType()->isPointerTy())
2496  return nullptr;
2497 
2498  unsigned AS = V->getType()->getPointerAddressSpace();
2499  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2500  APInt Offset = APInt::getZero(IntPtrWidth);
2501 
2502  // Even though we don't look through PHI nodes, we could be called on an
2503  // instruction in an unreachable block, which may be on a cycle.
2504  SmallPtrSet<Value *, 4> Visited;
2505  Visited.insert(V);
2506  do {
2507  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2508  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2509  return nullptr;
2510  V = GEP->getPointerOperand();
2511  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2512  V = cast<Operator>(V)->getOperand(0);
2513  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2514  if (GA->isInterposable())
2515  break;
2516  V = GA->getAliasee();
2517  } else {
2518  break;
2519  }
2520  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2521  } while (Visited.insert(V).second);
2522 
2523  Type *IdxPtrTy = DL.getIndexType(V->getType());
2524  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2525 }
2526 
2527 /// Find dead blocks due to deleted CFG edges during inlining.
2528 ///
2529 /// If we know the successor of the current block, \p CurrBB, has to be \p
2530 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2531 /// no live incoming CFG edges. If one block is found to be dead, we can
2532 /// continue growing the dead block list by checking the successors of the dead
2533 /// blocks to see if all their incoming edges are dead or not.
2534 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2535  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2536  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2537  // known successor which is not the one under exam.
2538  return (DeadBlocks.count(Pred) ||
2539  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2540  };
2541 
2542  auto IsNewlyDead = [&](BasicBlock *BB) {
2543  // If all the edges to a block are dead, the block is also dead.
2544  return (!DeadBlocks.count(BB) &&
2546  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2547  };
2548 
2549  for (BasicBlock *Succ : successors(CurrBB)) {
2550  if (Succ == NextBB || !IsNewlyDead(Succ))
2551  continue;
2553  NewDead.push_back(Succ);
2554  while (!NewDead.empty()) {
2555  BasicBlock *Dead = NewDead.pop_back_val();
2556  if (DeadBlocks.insert(Dead))
2557  // Continue growing the dead block lists.
2558  for (BasicBlock *S : successors(Dead))
2559  if (IsNewlyDead(S))
2560  NewDead.push_back(S);
2561  }
2562  }
2563 }
2564 
2565 /// Analyze a call site for potential inlining.
2566 ///
2567 /// Returns true if inlining this call is viable, and false if it is not
2568 /// viable. It computes the cost and adjusts the threshold based on numerous
2569 /// factors and heuristics. If this method returns false but the computed cost
2570 /// is below the computed threshold, then inlining was forcibly disabled by
2571 /// some artifact of the routine.
2572 InlineResult CallAnalyzer::analyze() {
2573  ++NumCallsAnalyzed;
2574 
2575  auto Result = onAnalysisStart();
2576  if (!Result.isSuccess())
2577  return Result;
2578 
2579  if (F.empty())
2580  return InlineResult::success();
2581 
2582  Function *Caller = CandidateCall.getFunction();
2583  // Check if the caller function is recursive itself.
2584  for (User *U : Caller->users()) {
2585  CallBase *Call = dyn_cast<CallBase>(U);
2586  if (Call && Call->getFunction() == Caller) {
2587  IsCallerRecursive = true;
2588  break;
2589  }
2590  }
2591 
2592  // Populate our simplified values by mapping from function arguments to call
2593  // arguments with known important simplifications.
2594  auto CAI = CandidateCall.arg_begin();
2595  for (Argument &FAI : F.args()) {
2596  assert(CAI != CandidateCall.arg_end());
2597  if (Constant *C = dyn_cast<Constant>(CAI))
2598  SimplifiedValues[&FAI] = C;
2599 
2600  Value *PtrArg = *CAI;
2601  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2602  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2603 
2604  // We can SROA any pointer arguments derived from alloca instructions.
2605  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2606  SROAArgValues[&FAI] = SROAArg;
2607  onInitializeSROAArg(SROAArg);
2608  EnabledSROAAllocas.insert(SROAArg);
2609  }
2610  }
2611  ++CAI;
2612  }
2613  NumConstantArgs = SimplifiedValues.size();
2614  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2615  NumAllocaArgs = SROAArgValues.size();
2616 
2617  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2618  // the ephemeral values multiple times (and they're completely determined by
2619  // the callee, so this is purely duplicate work).
2621  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2622 
2623  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2624  // adding basic blocks of the callee which can be proven to be dead for this
2625  // particular call site in order to get more accurate cost estimates. This
2626  // requires a somewhat heavyweight iteration pattern: we need to walk the
2627  // basic blocks in a breadth-first order as we insert live successors. To
2628  // accomplish this, prioritizing for small iterations because we exit after
2629  // crossing our threshold, we use a small-size optimized SetVector.
2632  BBSetVector;
2633  BBSetVector BBWorklist;
2634  BBWorklist.insert(&F.getEntryBlock());
2635 
2636  // Note that we *must not* cache the size, this loop grows the worklist.
2637  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2638  if (shouldStop())
2639  break;
2640 
2641  BasicBlock *BB = BBWorklist[Idx];
2642  if (BB->empty())
2643  continue;
2644 
2645  onBlockStart(BB);
2646 
2647  // Disallow inlining a blockaddress with uses other than strictly callbr.
2648  // A blockaddress only has defined behavior for an indirect branch in the
2649  // same function, and we do not currently support inlining indirect
2650  // branches. But, the inliner may not see an indirect branch that ends up
2651  // being dead code at a particular call site. If the blockaddress escapes
2652  // the function, e.g., via a global variable, inlining may lead to an
2653  // invalid cross-function reference.
2654  // FIXME: pr/39560: continue relaxing this overt restriction.
2655  if (BB->hasAddressTaken())
2656  for (User *U : BlockAddress::get(&*BB)->users())
2657  if (!isa<CallBrInst>(*U))
2658  return InlineResult::failure("blockaddress used outside of callbr");
2659 
2660  // Analyze the cost of this block. If we blow through the threshold, this
2661  // returns false, and we can bail on out.
2662  InlineResult IR = analyzeBlock(BB, EphValues);
2663  if (!IR.isSuccess())
2664  return IR;
2665 
2666  Instruction *TI = BB->getTerminator();
2667 
2668  // Add in the live successors by first checking whether we have terminator
2669  // that may be simplified based on the values simplified by this call.
2670  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2671  if (BI->isConditional()) {
2672  Value *Cond = BI->getCondition();
2673  if (ConstantInt *SimpleCond =
2674  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2675  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2676  BBWorklist.insert(NextBB);
2677  KnownSuccessors[BB] = NextBB;
2678  findDeadBlocks(BB, NextBB);
2679  continue;
2680  }
2681  }
2682  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2683  Value *Cond = SI->getCondition();
2684  if (ConstantInt *SimpleCond =
2685  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2686  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2687  BBWorklist.insert(NextBB);
2688  KnownSuccessors[BB] = NextBB;
2689  findDeadBlocks(BB, NextBB);
2690  continue;
2691  }
2692  }
2693 
2694  // If we're unable to select a particular successor, just count all of
2695  // them.
2696  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2697  ++TIdx)
2698  BBWorklist.insert(TI->getSuccessor(TIdx));
2699 
2700  onBlockAnalyzed(BB);
2701  }
2702 
2703  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
2704  &F == CandidateCall.getCalledFunction();
2705  // If this is a noduplicate call, we can still inline as long as
2706  // inlining this would cause the removal of the caller (so the instruction
2707  // is not actually duplicated, just moved).
2708  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2709  return InlineResult::failure("noduplicate");
2710 
2711  return finalizeAnalysis();
2712 }
2713 
2715 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2717  F.print(OS, &Writer);
2718  DEBUG_PRINT_STAT(NumConstantArgs);
2719  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2720  DEBUG_PRINT_STAT(NumAllocaArgs);
2721  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2722  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2723  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2724  DEBUG_PRINT_STAT(NumInstructions);
2725  DEBUG_PRINT_STAT(SROACostSavings);
2726  DEBUG_PRINT_STAT(SROACostSavingsLost);
2727  DEBUG_PRINT_STAT(LoadEliminationCost);
2728  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2729  DEBUG_PRINT_STAT(Cost);
2731 #undef DEBUG_PRINT_STAT
2732 }
2733 
2734 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2735 /// Dump stats about this call's analysis.
2737 #endif
2738 
2739 /// Test that there are no attribute conflicts between Caller and Callee
2740 /// that prevent inlining.
2742  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2743  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2744  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2745  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2746  // object, and always returns the same object (which is overwritten on each
2747  // GetTLI call). Therefore we copy the first result.
2748  auto CalleeTLI = GetTLI(*Callee);
2749  return TTI.areInlineCompatible(Caller, Callee) &&
2750  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2753 }
2754 
2756  int Cost = 0;
2757  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2758  if (Call.isByValArgument(I)) {
2759  // We approximate the number of loads and stores needed by dividing the
2760  // size of the byval type by the target's pointer size.
2761  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2762  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2763  unsigned AS = PTy->getAddressSpace();
2764  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2765  // Ceiling division.
2766  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2767 
2768  // If it generates more than 8 stores it is likely to be expanded as an
2769  // inline memcpy so we take that as an upper bound. Otherwise we assume
2770  // one load and one store per word copied.
2771  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2772  // here instead of a magic number of 8, but it's not available via
2773  // DataLayout.
2774  NumStores = std::min(NumStores, 8U);
2775 
2776  Cost += 2 * NumStores * InlineConstants::InstrCost;
2777  } else {
2778  // For non-byval arguments subtract off one instruction per call
2779  // argument.
2781  }
2782  }
2783  // The call instruction also disappears after inlining.
2785  return Cost;
2786 }
2787 
2789  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2790  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2791  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2794  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2795  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2796 }
2797 
2799  CallBase &Call, TargetTransformInfo &CalleeTTI,
2800  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2803  const InlineParams Params = {/* DefaultThreshold*/ 0,
2804  /*HintThreshold*/ {},
2805  /*ColdThreshold*/ {},
2806  /*OptSizeThreshold*/ {},
2807  /*OptMinSizeThreshold*/ {},
2808  /*HotCallSiteThreshold*/ {},
2809  /*LocallyHotCallSiteThreshold*/ {},
2810  /*ColdCallSiteThreshold*/ {},
2811  /*ComputeFullInlineCost*/ true,
2812  /*EnableDeferral*/ true};
2813 
2814  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2815  GetAssumptionCache, GetBFI, PSI, ORE, true,
2816  /*IgnoreThreshold*/ true);
2817  auto R = CA.analyze();
2818  if (!R.isSuccess())
2819  return None;
2820  return CA.getCost();
2821 }
2822 
2824  CallBase &Call, TargetTransformInfo &CalleeTTI,
2825  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2828  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2829  ORE, *Call.getCalledFunction(), Call);
2830  auto R = CFA.analyze();
2831  if (!R.isSuccess())
2832  return None;
2833  return CFA.features();
2834 }
2835 
2837  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2838  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2839 
2840  // Cannot inline indirect calls.
2841  if (!Callee)
2842  return InlineResult::failure("indirect call");
2843 
2844  // When callee coroutine function is inlined into caller coroutine function
2845  // before coro-split pass,
2846  // coro-early pass can not handle this quiet well.
2847  // So we won't inline the coroutine function if it have not been unsplited
2848  if (Callee->isPresplitCoroutine())
2849  return InlineResult::failure("unsplited coroutine call");
2850 
2851  // Never inline calls with byval arguments that does not have the alloca
2852  // address space. Since byval arguments can be replaced with a copy to an
2853  // alloca, the inlined code would need to be adjusted to handle that the
2854  // argument is in the alloca address space (so it is a little bit complicated
2855  // to solve).
2856  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2857  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2858  if (Call.isByValArgument(I)) {
2859  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2860  if (PTy->getAddressSpace() != AllocaAS)
2861  return InlineResult::failure("byval arguments without alloca"
2862  " address space");
2863  }
2864 
2865  // Calls to functions with always-inline attributes should be inlined
2866  // whenever possible.
2867  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2868  auto IsViable = isInlineViable(*Callee);
2869  if (IsViable.isSuccess())
2870  return InlineResult::success();
2871  return InlineResult::failure(IsViable.getFailureReason());
2872  }
2873 
2874  // Never inline functions with conflicting attributes (unless callee has
2875  // always-inline attribute).
2876  Function *Caller = Call.getCaller();
2877  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2878  return InlineResult::failure("conflicting attributes");
2879 
2880  // Don't inline this call if the caller has the optnone attribute.
2881  if (Caller->hasOptNone())
2882  return InlineResult::failure("optnone attribute");
2883 
2884  // Don't inline a function that treats null pointer as valid into a caller
2885  // that does not have this attribute.
2886  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2887  return InlineResult::failure("nullptr definitions incompatible");
2888 
2889  // Don't inline functions which can be interposed at link-time.
2890  if (Callee->isInterposable())
2891  return InlineResult::failure("interposable");
2892 
2893  // Don't inline functions marked noinline.
2894  if (Callee->hasFnAttribute(Attribute::NoInline))
2895  return InlineResult::failure("noinline function attribute");
2896 
2897  // Don't inline call sites marked noinline.
2898  if (Call.isNoInline())
2899  return InlineResult::failure("noinline call site attribute");
2900 
2901  // Don't inline functions if one does not have any stack protector attribute
2902  // but the other does.
2903  if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr())
2904  return InlineResult::failure(
2905  "stack protected caller but callee requested no stack protector");
2906  if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr())
2907  return InlineResult::failure(
2908  "stack protected callee but caller requested no stack protector");
2909 
2910  return None;
2911 }
2912 
2914  CallBase &Call, Function *Callee, const InlineParams &Params,
2915  TargetTransformInfo &CalleeTTI,
2916  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2917  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2920 
2921  auto UserDecision =
2922  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2923 
2924  if (UserDecision.hasValue()) {
2925  if (UserDecision->isSuccess())
2926  return llvm::InlineCost::getAlways("always inline attribute");
2927  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2928  }
2929 
2930  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2931  << "... (caller:" << Call.getCaller()->getName()
2932  << ")\n");
2933 
2934  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2935  GetAssumptionCache, GetBFI, PSI, ORE);
2936  InlineResult ShouldInline = CA.analyze();
2937 
2938  LLVM_DEBUG(CA.dump());
2939 
2940  // Always make cost benefit based decision explicit.
2941  // We use always/never here since threshold is not meaningful,
2942  // as it's not what drives cost-benefit analysis.
2943  if (CA.wasDecidedByCostBenefit()) {
2944  if (ShouldInline.isSuccess())
2945  return InlineCost::getAlways("benefit over cost",
2946  CA.getCostBenefitPair());
2947  else
2948  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2949  }
2950 
2951  if (CA.wasDecidedByCostThreshold())
2952  return InlineCost::get(CA.getCost(), CA.getThreshold());
2953 
2954  // No details on how the decision was made, simply return always or never.
2955  return ShouldInline.isSuccess()
2956  ? InlineCost::getAlways("empty function")
2957  : InlineCost::getNever(ShouldInline.getFailureReason());
2958 }
2959 
2961  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2962  for (BasicBlock &BB : F) {
2963  // Disallow inlining of functions which contain indirect branches.
2964  if (isa<IndirectBrInst>(BB.getTerminator()))
2965  return InlineResult::failure("contains indirect branches");
2966 
2967  // Disallow inlining of blockaddresses which are used by non-callbr
2968  // instructions.
2969  if (BB.hasAddressTaken())
2970  for (User *U : BlockAddress::get(&BB)->users())
2971  if (!isa<CallBrInst>(*U))
2972  return InlineResult::failure("blockaddress used outside of callbr");
2973 
2974  for (auto &II : BB) {
2975  CallBase *Call = dyn_cast<CallBase>(&II);
2976  if (!Call)
2977  continue;
2978 
2979  // Disallow recursive calls.
2980  Function *Callee = Call->getCalledFunction();
2981  if (&F == Callee)
2982  return InlineResult::failure("recursive call");
2983 
2984  // Disallow calls which expose returns-twice to a function not previously
2985  // attributed as such.
2986  if (!ReturnsTwice && isa<CallInst>(Call) &&
2987  cast<CallInst>(Call)->canReturnTwice())
2988  return InlineResult::failure("exposes returns-twice attribute");
2989 
2990  if (Callee)
2991  switch (Callee->getIntrinsicID()) {
2992  default:
2993  break;
2994  case llvm::Intrinsic::icall_branch_funnel:
2995  // Disallow inlining of @llvm.icall.branch.funnel because current
2996  // backend can't separate call targets from call arguments.
2997  return InlineResult::failure(
2998  "disallowed inlining of @llvm.icall.branch.funnel");
2999  case llvm::Intrinsic::localescape:
3000  // Disallow inlining functions that call @llvm.localescape. Doing this
3001  // correctly would require major changes to the inliner.
3002  return InlineResult::failure(
3003  "disallowed inlining of @llvm.localescape");
3004  case llvm::Intrinsic::vastart:
3005  // Disallow inlining of functions that initialize VarArgs with
3006  // va_start.
3007  return InlineResult::failure(
3008  "contains VarArgs initialized with va_start");
3009  }
3010  }
3011  }
3012 
3013  return InlineResult::success();
3014 }
3015 
3016 // APIs to create InlineParams based on command line flags and/or other
3017 // parameters.
3018 
3020  InlineParams Params;
3021 
3022  // This field is the threshold to use for a callee by default. This is
3023  // derived from one or more of:
3024  // * optimization or size-optimization levels,
3025  // * a value passed to createFunctionInliningPass function, or
3026  // * the -inline-threshold flag.
3027  // If the -inline-threshold flag is explicitly specified, that is used
3028  // irrespective of anything else.
3029  if (InlineThreshold.getNumOccurrences() > 0)
3031  else
3032  Params.DefaultThreshold = Threshold;
3033 
3034  // Set the HintThreshold knob from the -inlinehint-threshold.
3035  Params.HintThreshold = HintThreshold;
3036 
3037  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3039 
3040  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3041  // populate LocallyHotCallSiteThreshold. Later, we populate
3042  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3043  // we know that optimization level is O3 (in the getInlineParams variant that
3044  // takes the opt and size levels).
3045  // FIXME: Remove this check (and make the assignment unconditional) after
3046  // addressing size regression issues at O2.
3047  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3049 
3050  // Set the ColdCallSiteThreshold knob from the
3051  // -inline-cold-callsite-threshold.
3053 
3054  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3055  // -inlinehint-threshold commandline option is not explicitly given. If that
3056  // option is present, then its value applies even for callees with size and
3057  // minsize attributes.
3058  // If the -inline-threshold is not specified, set the ColdThreshold from the
3059  // -inlinecold-threshold even if it is not explicitly passed. If
3060  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3061  // explicitly specified to set the ColdThreshold knob
3062  if (InlineThreshold.getNumOccurrences() == 0) {
3065  Params.ColdThreshold = ColdThreshold;
3066  } else if (ColdThreshold.getNumOccurrences() > 0) {
3067  Params.ColdThreshold = ColdThreshold;
3068  }
3069  return Params;
3070 }
3071 
3074 }
3075 
3076 // Compute the default threshold for inlining based on the opt level and the
3077 // size opt level.
3078 static int computeThresholdFromOptLevels(unsigned OptLevel,
3079  unsigned SizeOptLevel) {
3080  if (OptLevel > 2)
3082  if (SizeOptLevel == 1) // -Os
3084  if (SizeOptLevel == 2) // -Oz
3086  return DefaultThreshold;
3087 }
3088 
3089 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3090  auto Params =
3091  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3092  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3093  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3094  // when it is specified explicitly.
3095  if (OptLevel > 2)
3097  return Params;
3098 }
3099 
3103  PrintInstructionComments = true;
3104  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3105  [&](Function &F) -> AssumptionCache & {
3106  return FAM.getResult<AssumptionAnalysis>(F);
3107  };
3108  Module *M = F.getParent();
3109  ProfileSummaryInfo PSI(*M);
3110  DataLayout DL(M);
3112  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3113  // In the current implementation, the type of InlineParams doesn't matter as
3114  // the pass serves only for verification of inliner's decisions.
3115  // We can add a flag which determines InlineParams for this run. Right now,
3116  // the default InlineParams are used.
3117  const InlineParams Params = llvm::getInlineParams();
3118  for (BasicBlock &BB : F) {
3119  for (Instruction &I : BB) {
3120  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3121  Function *CalledFunction = CI->getCalledFunction();
3122  if (!CalledFunction || CalledFunction->isDeclaration())
3123  continue;
3124  OptimizationRemarkEmitter ORE(CalledFunction);
3125  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3126  GetAssumptionCache, nullptr, &PSI, &ORE);
3127  ICCA.analyze();
3128  OS << " Analyzing call of " << CalledFunction->getName()
3129  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3130  ICCA.print(OS);
3131  OS << "\n";
3132  }
3133  }
3134  }
3135  return PreservedAnalyses::all();
3136 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:91
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:164
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:265
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:506
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:167
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:803
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3019
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:187
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1767
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Attribute
Definition: Attributes.h:51
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5227
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2502
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:206
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2233
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2420
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ConstantExpr::getInsertValue
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2623
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:36
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform"))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1611
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:112
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1256
llvm::SimplifyFNegInst
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:4945
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2960
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
Operator.h
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:193
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:201
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1318
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:199
llvm::UnaryOperator
Definition: InstrTypes.h:103
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::CallBase::isFnAttrDisallowedByOpBundle
bool isFnAttrDisallowedByOpBundle(StringRef S) const
Is the function attribute S disallowed by some operand bundle on this operand bundle user?
Definition: InstrTypes.h:2089
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2741
F
#define F(x, y, z)
Definition: MD5.cpp:56
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2701
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:281
CodeMetrics.h
FormattedStream.h
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1152
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:980
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:771
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1340
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1581
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2788
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantExpr::getIntToPtr
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2219
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::ConstantExpr::getPtrToInt
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2205
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3049
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1367
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:49
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1040
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:168
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3101
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:783
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:107
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3072
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:45
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
llvm::None
const NoneType None
Definition: None.h:23
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:509
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3182
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3778
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:711
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2398
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:46
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:613
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4553
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::SimplifyBinOp
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5397
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:264
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:35
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:866
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:70
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:42
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:318
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:122
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1988
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1750
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:164
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:281
llvm::Record
Definition: Record.h:1486
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:209
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:116
llvm::GEPOperator
Definition: Operator.h:473
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1324
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3179
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
CFG.h
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:263
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1614
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:216
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:870
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5176
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:169
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:431
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:431
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:530
users
iv users
Definition: IVUsers.cpp:52
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:55
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2836
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4226
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:190
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:873
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:643
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:49
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2823
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:978
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2413
llvm::TypeSize
Definition: TypeSize.h:416
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:100
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:165
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1210
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:192
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:39
llvm::CleanupReturnInst
Definition: Instructions.h:4634
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:204
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5133
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3643
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:197
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3078
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:212
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2798
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2019
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2755
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:48
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1834
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2666
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1487
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4742
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:202
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3245
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:412
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:60
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3101
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:48
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:52
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:218
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2524
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:47
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1883
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:544
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:159
Debug.h
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:196
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3180
llvm::ConstantExpr::getExtractValue
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2647
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3194
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364