LLVM  9.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/Debug.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "inline-cost"
45 
46 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
47 
49  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
50  cl::desc("Control the amount of inlining to perform (default = 225)"));
51 
53  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
54  cl::desc("Threshold for inlining functions with inline hint"));
55 
56 static cl::opt<int>
57  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
59  cl::desc("Threshold for inlining cold callsites"));
60 
61 // We introduce this threshold to help performance of instrumentation based
62 // PGO before we actually hook up inliner with analysis passes such as BPI and
63 // BFI.
65  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
66  cl::desc("Threshold for inlining functions with cold attribute"));
67 
68 static cl::opt<int>
69  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
71  cl::desc("Threshold for hot callsites "));
72 
74  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
75  cl::desc("Threshold for locally hot callsites "));
76 
78  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
79  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
80  "entry frequency, for a callsite to be cold in the absence of "
81  "profile information."));
82 
84  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
85  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
86  "entry frequency, for a callsite to be hot in the absence of "
87  "profile information."));
88 
90  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
91  cl::desc("Compute the full inline cost of a call site even when the cost "
92  "exceeds the threshold."));
93 
94 namespace {
95 
96 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
98  friend class InstVisitor<CallAnalyzer, bool>;
99 
100  /// The TargetTransformInfo available for this compilation.
101  const TargetTransformInfo &TTI;
102 
103  /// Getter for the cache of @llvm.assume intrinsics.
104  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
105 
106  /// Getter for BlockFrequencyInfo
108 
109  /// Profile summary information.
110  ProfileSummaryInfo *PSI;
111 
112  /// The called function.
113  Function &F;
114 
115  // Cache the DataLayout since we use it a lot.
116  const DataLayout &DL;
117 
118  /// The OptimizationRemarkEmitter available for this compilation.
120 
121  /// The candidate callsite being analyzed. Please do not use this to do
122  /// analysis in the caller function; we want the inline cost query to be
123  /// easily cacheable. Instead, use the cover function paramHasAttr.
124  CallBase &CandidateCall;
125 
126  /// Tunable parameters that control the analysis.
127  const InlineParams &Params;
128 
129  /// Upper bound for the inlining cost. Bonuses are being applied to account
130  /// for speculative "expected profit" of the inlining decision.
131  int Threshold;
132 
133  /// Inlining cost measured in abstract units, accounts for all the
134  /// instructions expected to be executed for a given function invocation.
135  /// Instructions that are statically proven to be dead based on call-site
136  /// arguments are not counted here.
137  int Cost = 0;
138 
139  bool ComputeFullInlineCost;
140 
141  bool IsCallerRecursive = false;
142  bool IsRecursiveCall = false;
143  bool ExposesReturnsTwice = false;
144  bool HasDynamicAlloca = false;
145  bool ContainsNoDuplicateCall = false;
146  bool HasReturn = false;
147  bool HasIndirectBr = false;
148  bool HasUninlineableIntrinsic = false;
149  bool InitsVargArgs = false;
150 
151  /// Number of bytes allocated statically by the callee.
152  uint64_t AllocatedSize = 0;
153  unsigned NumInstructions = 0;
154  unsigned NumVectorInstructions = 0;
155 
156  /// Bonus to be applied when percentage of vector instructions in callee is
157  /// high (see more details in updateThreshold).
158  int VectorBonus = 0;
159  /// Bonus to be applied when the callee has only one reachable basic block.
160  int SingleBBBonus = 0;
161 
162  /// While we walk the potentially-inlined instructions, we build up and
163  /// maintain a mapping of simplified values specific to this callsite. The
164  /// idea is to propagate any special information we have about arguments to
165  /// this call through the inlinable section of the function, and account for
166  /// likely simplifications post-inlining. The most important aspect we track
167  /// is CFG altering simplifications -- when we prove a basic block dead, that
168  /// can cause dramatic shifts in the cost of inlining a function.
169  DenseMap<Value *, Constant *> SimplifiedValues;
170 
171  /// Keep track of the values which map back (through function arguments) to
172  /// allocas on the caller stack which could be simplified through SROA.
173  DenseMap<Value *, Value *> SROAArgValues;
174 
175  /// The mapping of caller Alloca values to their accumulated cost savings. If
176  /// we have to disable SROA for one of the allocas, this tells us how much
177  /// cost must be added.
178  DenseMap<Value *, int> SROAArgCosts;
179 
180  /// Keep track of values which map to a pointer base and constant offset.
182 
183  /// Keep track of dead blocks due to the constant arguments.
184  SetVector<BasicBlock *> DeadBlocks;
185 
186  /// The mapping of the blocks to their known unique successors due to the
187  /// constant arguments.
188  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
189 
190  /// Model the elimination of repeated loads that is expected to happen
191  /// whenever we simplify away the stores that would otherwise cause them to be
192  /// loads.
193  bool EnableLoadElimination;
194  SmallPtrSet<Value *, 16> LoadAddrSet;
195  int LoadEliminationCost = 0;
196 
197  // Custom simplification helper routines.
198  bool isAllocaDerivedArg(Value *V);
199  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
201  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
202  void disableSROA(Value *V);
203  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
204  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
205  int InstructionCost);
206  void disableLoadElimination();
207  bool isGEPFree(GetElementPtrInst &GEP);
208  bool canFoldInboundsGEP(GetElementPtrInst &I);
209  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
210  bool simplifyCallSite(Function *F, CallBase &Call);
211  template <typename Callable>
212  bool simplifyInstruction(Instruction &I, Callable Evaluate);
213  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
214 
215  /// Return true if the given argument to the function being considered for
216  /// inlining has the given attribute set either at the call site or the
217  /// function declaration. Primarily used to inspect call site specific
218  /// attributes since these can be more precise than the ones on the callee
219  /// itself.
220  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
221 
222  /// Return true if the given value is known non null within the callee if
223  /// inlined through this particular callsite.
224  bool isKnownNonNullInCallee(Value *V);
225 
226  /// Update Threshold based on callsite properties such as callee
227  /// attributes and callee hotness for PGO builds. The Callee is explicitly
228  /// passed to support analyzing indirect calls whose target is inferred by
229  /// analysis.
230  void updateThreshold(CallBase &Call, Function &Callee);
231 
232  /// Return true if size growth is allowed when inlining the callee at \p Call.
233  bool allowSizeGrowth(CallBase &Call);
234 
235  /// Return true if \p Call is a cold callsite.
236  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
237 
238  /// Return a higher threshold if \p Call is a hot callsite.
239  Optional<int> getHotCallSiteThreshold(CallBase &Call,
240  BlockFrequencyInfo *CallerBFI);
241 
242  // Custom analysis routines.
243  InlineResult analyzeBlock(BasicBlock *BB,
244  SmallPtrSetImpl<const Value *> &EphValues);
245 
246  /// Handle a capped 'int' increment for Cost.
247  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
248  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
249  Cost = (int)std::min(UpperBound, Cost + Inc);
250  }
251 
252  // Disable several entry points to the visitor so we don't accidentally use
253  // them by declaring but not defining them here.
254  void visit(Module *);
255  void visit(Module &);
256  void visit(Function *);
257  void visit(Function &);
258  void visit(BasicBlock *);
259  void visit(BasicBlock &);
260 
261  // Provide base case for our instruction visit.
262  bool visitInstruction(Instruction &I);
263 
264  // Our visit overrides.
265  bool visitAlloca(AllocaInst &I);
266  bool visitPHI(PHINode &I);
267  bool visitGetElementPtr(GetElementPtrInst &I);
268  bool visitBitCast(BitCastInst &I);
269  bool visitPtrToInt(PtrToIntInst &I);
270  bool visitIntToPtr(IntToPtrInst &I);
271  bool visitCastInst(CastInst &I);
272  bool visitUnaryInstruction(UnaryInstruction &I);
273  bool visitCmpInst(CmpInst &I);
274  bool visitSub(BinaryOperator &I);
275  bool visitBinaryOperator(BinaryOperator &I);
276  bool visitFNeg(UnaryOperator &I);
277  bool visitLoad(LoadInst &I);
278  bool visitStore(StoreInst &I);
279  bool visitExtractValue(ExtractValueInst &I);
280  bool visitInsertValue(InsertValueInst &I);
281  bool visitCallBase(CallBase &Call);
282  bool visitReturnInst(ReturnInst &RI);
283  bool visitBranchInst(BranchInst &BI);
284  bool visitSelectInst(SelectInst &SI);
285  bool visitSwitchInst(SwitchInst &SI);
286  bool visitIndirectBrInst(IndirectBrInst &IBI);
287  bool visitResumeInst(ResumeInst &RI);
288  bool visitCleanupReturnInst(CleanupReturnInst &RI);
289  bool visitCatchReturnInst(CatchReturnInst &RI);
290  bool visitUnreachableInst(UnreachableInst &I);
291 
292 public:
293  CallAnalyzer(const TargetTransformInfo &TTI,
294  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
297  Function &Callee, CallBase &Call, const InlineParams &Params)
298  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
299  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
300  CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
301  ComputeFullInlineCost(OptComputeFullInlineCost ||
302  Params.ComputeFullInlineCost || ORE),
303  EnableLoadElimination(true) {}
304 
305  InlineResult analyzeCall(CallBase &Call);
306 
307  int getThreshold() { return Threshold; }
308  int getCost() { return Cost; }
309 
310  // Keep a bunch of stats about the cost savings found so we can print them
311  // out when debugging.
312  unsigned NumConstantArgs = 0;
313  unsigned NumConstantOffsetPtrArgs = 0;
314  unsigned NumAllocaArgs = 0;
315  unsigned NumConstantPtrCmps = 0;
316  unsigned NumConstantPtrDiffs = 0;
317  unsigned NumInstructionsSimplified = 0;
318  unsigned SROACostSavings = 0;
319  unsigned SROACostSavingsLost = 0;
320 
321  void dump();
322 };
323 
324 } // namespace
325 
326 /// Test whether the given value is an Alloca-derived function argument.
327 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
328  return SROAArgValues.count(V);
329 }
330 
331 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
332 /// Returns false if V does not map to a SROA-candidate.
333 bool CallAnalyzer::lookupSROAArgAndCost(
335  if (SROAArgValues.empty() || SROAArgCosts.empty())
336  return false;
337 
338  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
339  if (ArgIt == SROAArgValues.end())
340  return false;
341 
342  Arg = ArgIt->second;
343  CostIt = SROAArgCosts.find(Arg);
344  return CostIt != SROAArgCosts.end();
345 }
346 
347 /// Disable SROA for the candidate marked by this cost iterator.
348 ///
349 /// This marks the candidate as no longer viable for SROA, and adds the cost
350 /// savings associated with it back into the inline cost measurement.
351 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
352  // If we're no longer able to perform SROA we need to undo its cost savings
353  // and prevent subsequent analysis.
354  addCost(CostIt->second);
355  SROACostSavings -= CostIt->second;
356  SROACostSavingsLost += CostIt->second;
357  SROAArgCosts.erase(CostIt);
358  disableLoadElimination();
359 }
360 
361 /// If 'V' maps to a SROA candidate, disable SROA for it.
362 void CallAnalyzer::disableSROA(Value *V) {
363  Value *SROAArg;
365  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
366  disableSROA(CostIt);
367 }
368 
369 /// Accumulate the given cost for a particular SROA candidate.
370 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
371  int InstructionCost) {
372  CostIt->second += InstructionCost;
373  SROACostSavings += InstructionCost;
374 }
375 
376 void CallAnalyzer::disableLoadElimination() {
377  if (EnableLoadElimination) {
378  addCost(LoadEliminationCost);
379  LoadEliminationCost = 0;
380  EnableLoadElimination = false;
381  }
382 }
383 
384 /// Accumulate a constant GEP offset into an APInt if possible.
385 ///
386 /// Returns false if unable to compute the offset for any reason. Respects any
387 /// simplified values known during the analysis of this callsite.
388 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
389  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
390  assert(IntPtrWidth == Offset.getBitWidth());
391 
392  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
393  GTI != GTE; ++GTI) {
394  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
395  if (!OpC)
396  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
397  OpC = dyn_cast<ConstantInt>(SimpleOp);
398  if (!OpC)
399  return false;
400  if (OpC->isZero())
401  continue;
402 
403  // Handle a struct index, which adds its field offset to the pointer.
404  if (StructType *STy = GTI.getStructTypeOrNull()) {
405  unsigned ElementIdx = OpC->getZExtValue();
406  const StructLayout *SL = DL.getStructLayout(STy);
407  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
408  continue;
409  }
410 
411  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
412  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
413  }
414  return true;
415 }
416 
417 /// Use TTI to check whether a GEP is free.
418 ///
419 /// Respects any simplified values known during the analysis of this callsite.
420 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
421  SmallVector<Value *, 4> Operands;
422  Operands.push_back(GEP.getOperand(0));
423  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
424  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
425  Operands.push_back(SimpleOp);
426  else
427  Operands.push_back(*I);
428  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
429 }
430 
431 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
432  // Check whether inlining will turn a dynamic alloca into a static
433  // alloca and handle that case.
434  if (I.isArrayAllocation()) {
435  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
436  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
437  Type *Ty = I.getAllocatedType();
438  AllocatedSize = SaturatingMultiplyAdd(
439  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
440  return Base::visitAlloca(I);
441  }
442  }
443 
444  // Accumulate the allocated size.
445  if (I.isStaticAlloca()) {
446  Type *Ty = I.getAllocatedType();
447  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
448  }
449 
450  // We will happily inline static alloca instructions.
451  if (I.isStaticAlloca())
452  return Base::visitAlloca(I);
453 
454  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
455  // a variety of reasons, and so we would like to not inline them into
456  // functions which don't currently have a dynamic alloca. This simply
457  // disables inlining altogether in the presence of a dynamic alloca.
458  HasDynamicAlloca = true;
459  return false;
460 }
461 
462 bool CallAnalyzer::visitPHI(PHINode &I) {
463  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
464  // though we don't want to propagate it's bonuses. The idea is to disable
465  // SROA if it *might* be used in an inappropriate manner.
466 
467  // Phi nodes are always zero-cost.
468  // FIXME: Pointer sizes may differ between different address spaces, so do we
469  // need to use correct address space in the call to getPointerSizeInBits here?
470  // Or could we skip the getPointerSizeInBits call completely? As far as I can
471  // see the ZeroOffset is used as a dummy value, so we can probably use any
472  // bit width for the ZeroOffset?
473  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
474  bool CheckSROA = I.getType()->isPointerTy();
475 
476  // Track the constant or pointer with constant offset we've seen so far.
477  Constant *FirstC = nullptr;
478  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
479  Value *FirstV = nullptr;
480 
481  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
482  BasicBlock *Pred = I.getIncomingBlock(i);
483  // If the incoming block is dead, skip the incoming block.
484  if (DeadBlocks.count(Pred))
485  continue;
486  // If the parent block of phi is not the known successor of the incoming
487  // block, skip the incoming block.
488  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
489  if (KnownSuccessor && KnownSuccessor != I.getParent())
490  continue;
491 
492  Value *V = I.getIncomingValue(i);
493  // If the incoming value is this phi itself, skip the incoming value.
494  if (&I == V)
495  continue;
496 
497  Constant *C = dyn_cast<Constant>(V);
498  if (!C)
499  C = SimplifiedValues.lookup(V);
500 
501  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
502  if (!C && CheckSROA)
503  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
504 
505  if (!C && !BaseAndOffset.first)
506  // The incoming value is neither a constant nor a pointer with constant
507  // offset, exit early.
508  return true;
509 
510  if (FirstC) {
511  if (FirstC == C)
512  // If we've seen a constant incoming value before and it is the same
513  // constant we see this time, continue checking the next incoming value.
514  continue;
515  // Otherwise early exit because we either see a different constant or saw
516  // a constant before but we have a pointer with constant offset this time.
517  return true;
518  }
519 
520  if (FirstV) {
521  // The same logic as above, but check pointer with constant offset here.
522  if (FirstBaseAndOffset == BaseAndOffset)
523  continue;
524  return true;
525  }
526 
527  if (C) {
528  // This is the 1st time we've seen a constant, record it.
529  FirstC = C;
530  continue;
531  }
532 
533  // The remaining case is that this is the 1st time we've seen a pointer with
534  // constant offset, record it.
535  FirstV = V;
536  FirstBaseAndOffset = BaseAndOffset;
537  }
538 
539  // Check if we can map phi to a constant.
540  if (FirstC) {
541  SimplifiedValues[&I] = FirstC;
542  return true;
543  }
544 
545  // Check if we can map phi to a pointer with constant offset.
546  if (FirstBaseAndOffset.first) {
547  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
548 
549  Value *SROAArg;
551  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
552  SROAArgValues[&I] = SROAArg;
553  }
554 
555  return true;
556 }
557 
558 /// Check we can fold GEPs of constant-offset call site argument pointers.
559 /// This requires target data and inbounds GEPs.
560 ///
561 /// \return true if the specified GEP can be folded.
562 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
563  // Check if we have a base + offset for the pointer.
564  std::pair<Value *, APInt> BaseAndOffset =
565  ConstantOffsetPtrs.lookup(I.getPointerOperand());
566  if (!BaseAndOffset.first)
567  return false;
568 
569  // Check if the offset of this GEP is constant, and if so accumulate it
570  // into Offset.
571  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
572  return false;
573 
574  // Add the result as a new mapping to Base + Offset.
575  ConstantOffsetPtrs[&I] = BaseAndOffset;
576 
577  return true;
578 }
579 
580 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
581  Value *SROAArg;
583  bool SROACandidate =
584  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
585 
586  // Lambda to check whether a GEP's indices are all constant.
587  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
588  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
589  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
590  return false;
591  return true;
592  };
593 
594  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
595  if (SROACandidate)
596  SROAArgValues[&I] = SROAArg;
597 
598  // Constant GEPs are modeled as free.
599  return true;
600  }
601 
602  // Variable GEPs will require math and will disable SROA.
603  if (SROACandidate)
604  disableSROA(CostIt);
605  return isGEPFree(I);
606 }
607 
608 /// Simplify \p I if its operands are constants and update SimplifiedValues.
609 /// \p Evaluate is a callable specific to instruction type that evaluates the
610 /// instruction when all the operands are constants.
611 template <typename Callable>
612 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
614  for (Value *Op : I.operands()) {
615  Constant *COp = dyn_cast<Constant>(Op);
616  if (!COp)
617  COp = SimplifiedValues.lookup(Op);
618  if (!COp)
619  return false;
620  COps.push_back(COp);
621  }
622  auto *C = Evaluate(COps);
623  if (!C)
624  return false;
625  SimplifiedValues[&I] = C;
626  return true;
627 }
628 
629 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
630  // Propagate constants through bitcasts.
631  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
632  return ConstantExpr::getBitCast(COps[0], I.getType());
633  }))
634  return true;
635 
636  // Track base/offsets through casts
637  std::pair<Value *, APInt> BaseAndOffset =
638  ConstantOffsetPtrs.lookup(I.getOperand(0));
639  // Casts don't change the offset, just wrap it up.
640  if (BaseAndOffset.first)
641  ConstantOffsetPtrs[&I] = BaseAndOffset;
642 
643  // Also look for SROA candidates here.
644  Value *SROAArg;
646  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
647  SROAArgValues[&I] = SROAArg;
648 
649  // Bitcasts are always zero cost.
650  return true;
651 }
652 
653 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
654  // Propagate constants through ptrtoint.
655  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
656  return ConstantExpr::getPtrToInt(COps[0], I.getType());
657  }))
658  return true;
659 
660  // Track base/offset pairs when converted to a plain integer provided the
661  // integer is large enough to represent the pointer.
662  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
663  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
664  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
665  std::pair<Value *, APInt> BaseAndOffset =
666  ConstantOffsetPtrs.lookup(I.getOperand(0));
667  if (BaseAndOffset.first)
668  ConstantOffsetPtrs[&I] = BaseAndOffset;
669  }
670 
671  // This is really weird. Technically, ptrtoint will disable SROA. However,
672  // unless that ptrtoint is *used* somewhere in the live basic blocks after
673  // inlining, it will be nuked, and SROA should proceed. All of the uses which
674  // would block SROA would also block SROA if applied directly to a pointer,
675  // and so we can just add the integer in here. The only places where SROA is
676  // preserved either cannot fire on an integer, or won't in-and-of themselves
677  // disable SROA (ext) w/o some later use that we would see and disable.
678  Value *SROAArg;
680  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
681  SROAArgValues[&I] = SROAArg;
682 
683  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
684 }
685 
686 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
687  // Propagate constants through ptrtoint.
688  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
689  return ConstantExpr::getIntToPtr(COps[0], I.getType());
690  }))
691  return true;
692 
693  // Track base/offset pairs when round-tripped through a pointer without
694  // modifications provided the integer is not too large.
695  Value *Op = I.getOperand(0);
696  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
697  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
698  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
699  if (BaseAndOffset.first)
700  ConstantOffsetPtrs[&I] = BaseAndOffset;
701  }
702 
703  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
704  Value *SROAArg;
706  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
707  SROAArgValues[&I] = SROAArg;
708 
709  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
710 }
711 
712 bool CallAnalyzer::visitCastInst(CastInst &I) {
713  // Propagate constants through casts.
714  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
715  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
716  }))
717  return true;
718 
719  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
720  disableSROA(I.getOperand(0));
721 
722  // If this is a floating-point cast, and the target says this operation
723  // is expensive, this may eventually become a library call. Treat the cost
724  // as such.
725  switch (I.getOpcode()) {
726  case Instruction::FPTrunc:
727  case Instruction::FPExt:
728  case Instruction::UIToFP:
729  case Instruction::SIToFP:
730  case Instruction::FPToUI:
731  case Instruction::FPToSI:
732  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
734  break;
735  default:
736  break;
737  }
738 
739  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
740 }
741 
742 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
743  Value *Operand = I.getOperand(0);
744  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
745  return ConstantFoldInstOperands(&I, COps[0], DL);
746  }))
747  return true;
748 
749  // Disable any SROA on the argument to arbitrary unary instructions.
750  disableSROA(Operand);
751 
752  return false;
753 }
754 
755 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
756  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
757 }
758 
759 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
760  // Does the *call site* have the NonNull attribute set on an argument? We
761  // use the attribute on the call site to memoize any analysis done in the
762  // caller. This will also trip if the callee function has a non-null
763  // parameter attribute, but that's a less interesting case because hopefully
764  // the callee would already have been simplified based on that.
765  if (Argument *A = dyn_cast<Argument>(V))
766  if (paramHasAttr(A, Attribute::NonNull))
767  return true;
768 
769  // Is this an alloca in the caller? This is distinct from the attribute case
770  // above because attributes aren't updated within the inliner itself and we
771  // always want to catch the alloca derived case.
772  if (isAllocaDerivedArg(V))
773  // We can actually predict the result of comparisons between an
774  // alloca-derived value and null. Note that this fires regardless of
775  // SROA firing.
776  return true;
777 
778  return false;
779 }
780 
781 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
782  // If the normal destination of the invoke or the parent block of the call
783  // site is unreachable-terminated, there is little point in inlining this
784  // unless there is literally zero cost.
785  // FIXME: Note that it is possible that an unreachable-terminated block has a
786  // hot entry. For example, in below scenario inlining hot_call_X() may be
787  // beneficial :
788  // main() {
789  // hot_call_1();
790  // ...
791  // hot_call_N()
792  // exit(0);
793  // }
794  // For now, we are not handling this corner case here as it is rare in real
795  // code. In future, we should elaborate this based on BPI and BFI in more
796  // general threshold adjusting heuristics in updateThreshold().
797  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
798  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
799  return false;
800  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
801  return false;
802 
803  return true;
804 }
805 
807  BlockFrequencyInfo *CallerBFI) {
808  // If global profile summary is available, then callsite's coldness is
809  // determined based on that.
810  if (PSI && PSI->hasProfileSummary())
811  return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
812 
813  // Otherwise we need BFI to be available.
814  if (!CallerBFI)
815  return false;
816 
817  // Determine if the callsite is cold relative to caller's entry. We could
818  // potentially cache the computation of scaled entry frequency, but the added
819  // complexity is not worth it unless this scaling shows up high in the
820  // profiles.
821  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
822  auto CallSiteBB = Call.getParent();
823  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
824  auto CallerEntryFreq =
825  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
826  return CallSiteFreq < CallerEntryFreq * ColdProb;
827 }
828 
830 CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
831  BlockFrequencyInfo *CallerBFI) {
832 
833  // If global profile summary is available, then callsite's hotness is
834  // determined based on that.
835  if (PSI && PSI->hasProfileSummary() &&
836  PSI->isHotCallSite(CallSite(&Call), CallerBFI))
837  return Params.HotCallSiteThreshold;
838 
839  // Otherwise we need BFI to be available and to have a locally hot callsite
840  // threshold.
841  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
842  return None;
843 
844  // Determine if the callsite is hot relative to caller's entry. We could
845  // potentially cache the computation of scaled entry frequency, but the added
846  // complexity is not worth it unless this scaling shows up high in the
847  // profiles.
848  auto CallSiteBB = Call.getParent();
849  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
850  auto CallerEntryFreq = CallerBFI->getEntryFreq();
851  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
852  return Params.LocallyHotCallSiteThreshold;
853 
854  // Otherwise treat it normally.
855  return None;
856 }
857 
858 void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
859  // If no size growth is allowed for this inlining, set Threshold to 0.
860  if (!allowSizeGrowth(Call)) {
861  Threshold = 0;
862  return;
863  }
864 
865  Function *Caller = Call.getCaller();
866 
867  // return min(A, B) if B is valid.
868  auto MinIfValid = [](int A, Optional<int> B) {
869  return B ? std::min(A, B.getValue()) : A;
870  };
871 
872  // return max(A, B) if B is valid.
873  auto MaxIfValid = [](int A, Optional<int> B) {
874  return B ? std::max(A, B.getValue()) : A;
875  };
876 
877  // Various bonus percentages. These are multiplied by Threshold to get the
878  // bonus values.
879  // SingleBBBonus: This bonus is applied if the callee has a single reachable
880  // basic block at the given callsite context. This is speculatively applied
881  // and withdrawn if more than one basic block is seen.
882  //
883  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
884  // of the last call to a static function as inlining such functions is
885  // guaranteed to reduce code size.
886  //
887  // These bonus percentages may be set to 0 based on properties of the caller
888  // and the callsite.
889  int SingleBBBonusPercent = 50;
890  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
892 
893  // Lambda to set all the above bonus and bonus percentages to 0.
894  auto DisallowAllBonuses = [&]() {
895  SingleBBBonusPercent = 0;
896  VectorBonusPercent = 0;
897  LastCallToStaticBonus = 0;
898  };
899 
900  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
901  // and reduce the threshold if the caller has the necessary attribute.
902  if (Caller->hasMinSize()) {
903  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
904  // For minsize, we want to disable the single BB bonus and the vector
905  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
906  // a static function will, at the minimum, eliminate the parameter setup and
907  // call/return instructions.
908  SingleBBBonusPercent = 0;
909  VectorBonusPercent = 0;
910  } else if (Caller->hasOptSize())
911  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
912 
913  // Adjust the threshold based on inlinehint attribute and profile based
914  // hotness information if the caller does not have MinSize attribute.
915  if (!Caller->hasMinSize()) {
916  if (Callee.hasFnAttribute(Attribute::InlineHint))
917  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
918 
919  // FIXME: After switching to the new passmanager, simplify the logic below
920  // by checking only the callsite hotness/coldness as we will reliably
921  // have local profile information.
922  //
923  // Callsite hotness and coldness can be determined if sample profile is
924  // used (which adds hotness metadata to calls) or if caller's
925  // BlockFrequencyInfo is available.
926  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
927  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
928  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
929  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
930  // FIXME: This should update the threshold only if it exceeds the
931  // current threshold, but AutoFDO + ThinLTO currently relies on this
932  // behavior to prevent inlining of hot callsites during ThinLTO
933  // compile phase.
934  Threshold = HotCallSiteThreshold.getValue();
935  } else if (isColdCallSite(Call, CallerBFI)) {
936  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
937  // Do not apply bonuses for a cold callsite including the
938  // LastCallToStatic bonus. While this bonus might result in code size
939  // reduction, it can cause the size of a non-cold caller to increase
940  // preventing it from being inlined.
941  DisallowAllBonuses();
942  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
943  } else if (PSI) {
944  // Use callee's global profile information only if we have no way of
945  // determining this via callsite information.
946  if (PSI->isFunctionEntryHot(&Callee)) {
947  LLVM_DEBUG(dbgs() << "Hot callee.\n");
948  // If callsite hotness can not be determined, we may still know
949  // that the callee is hot and treat it as a weaker hint for threshold
950  // increase.
951  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
952  } else if (PSI->isFunctionEntryCold(&Callee)) {
953  LLVM_DEBUG(dbgs() << "Cold callee.\n");
954  // Do not apply bonuses for a cold callee including the
955  // LastCallToStatic bonus. While this bonus might result in code size
956  // reduction, it can cause the size of a non-cold caller to increase
957  // preventing it from being inlined.
958  DisallowAllBonuses();
959  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
960  }
961  }
962  }
963 
964  // Finally, take the target-specific inlining threshold multiplier into
965  // account.
966  Threshold *= TTI.getInliningThresholdMultiplier();
967 
968  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
969  VectorBonus = Threshold * VectorBonusPercent / 100;
970 
971  bool OnlyOneCallAndLocalLinkage =
972  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
973  // If there is only one call of the function, and it has internal linkage,
974  // the cost of inlining it drops dramatically. It may seem odd to update
975  // Cost in updateThreshold, but the bonus depends on the logic in this method.
976  if (OnlyOneCallAndLocalLinkage)
977  Cost -= LastCallToStaticBonus;
978 }
979 
980 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
981  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
982  // First try to handle simplified comparisons.
983  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
984  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
985  }))
986  return true;
987 
988  if (I.getOpcode() == Instruction::FCmp)
989  return false;
990 
991  // Otherwise look for a comparison between constant offset pointers with
992  // a common base.
993  Value *LHSBase, *RHSBase;
994  APInt LHSOffset, RHSOffset;
995  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
996  if (LHSBase) {
997  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
998  if (RHSBase && LHSBase == RHSBase) {
999  // We have common bases, fold the icmp to a constant based on the
1000  // offsets.
1001  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1002  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1003  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1004  SimplifiedValues[&I] = C;
1005  ++NumConstantPtrCmps;
1006  return true;
1007  }
1008  }
1009  }
1010 
1011  // If the comparison is an equality comparison with null, we can simplify it
1012  // if we know the value (argument) can't be null
1013  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1014  isKnownNonNullInCallee(I.getOperand(0))) {
1015  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1016  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1018  return true;
1019  }
1020  // Finally check for SROA candidates in comparisons.
1021  Value *SROAArg;
1023  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1024  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1025  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1026  return true;
1027  }
1028 
1029  disableSROA(CostIt);
1030  }
1031 
1032  return false;
1033 }
1034 
1035 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1036  // Try to handle a special case: we can fold computing the difference of two
1037  // constant-related pointers.
1038  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1039  Value *LHSBase, *RHSBase;
1040  APInt LHSOffset, RHSOffset;
1041  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1042  if (LHSBase) {
1043  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1044  if (RHSBase && LHSBase == RHSBase) {
1045  // We have common bases, fold the subtract to a constant based on the
1046  // offsets.
1047  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1048  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1049  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1050  SimplifiedValues[&I] = C;
1051  ++NumConstantPtrDiffs;
1052  return true;
1053  }
1054  }
1055  }
1056 
1057  // Otherwise, fall back to the generic logic for simplifying and handling
1058  // instructions.
1059  return Base::visitSub(I);
1060 }
1061 
1062 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1063  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1064  Constant *CLHS = dyn_cast<Constant>(LHS);
1065  if (!CLHS)
1066  CLHS = SimplifiedValues.lookup(LHS);
1067  Constant *CRHS = dyn_cast<Constant>(RHS);
1068  if (!CRHS)
1069  CRHS = SimplifiedValues.lookup(RHS);
1070 
1071  Value *SimpleV = nullptr;
1072  if (auto FI = dyn_cast<FPMathOperator>(&I))
1073  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1074  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1075  else
1076  SimpleV =
1077  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1078 
1079  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1080  SimplifiedValues[&I] = C;
1081 
1082  if (SimpleV)
1083  return true;
1084 
1085  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1086  disableSROA(LHS);
1087  disableSROA(RHS);
1088 
1089  // If the instruction is floating point, and the target says this operation
1090  // is expensive, this may eventually become a library call. Treat the cost
1091  // as such. Unless it's fneg which can be implemented with an xor.
1092  using namespace llvm::PatternMatch;
1093  if (I.getType()->isFloatingPointTy() &&
1094  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
1095  !match(&I, m_FNeg(m_Value())))
1097 
1098  return false;
1099 }
1100 
1101 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
1102  Value *Op = I.getOperand(0);
1103  Constant *COp = dyn_cast<Constant>(Op);
1104  if (!COp)
1105  COp = SimplifiedValues.lookup(Op);
1106 
1107  Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
1108  cast<FPMathOperator>(I).getFastMathFlags(),
1109  DL);
1110 
1111  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1112  SimplifiedValues[&I] = C;
1113 
1114  if (SimpleV)
1115  return true;
1116 
1117  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
1118  disableSROA(Op);
1119 
1120  return false;
1121 }
1122 
1123 bool CallAnalyzer::visitLoad(LoadInst &I) {
1124  Value *SROAArg;
1126  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1127  if (I.isSimple()) {
1128  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1129  return true;
1130  }
1131 
1132  disableSROA(CostIt);
1133  }
1134 
1135  // If the data is already loaded from this address and hasn't been clobbered
1136  // by any stores or calls, this load is likely to be redundant and can be
1137  // eliminated.
1138  if (EnableLoadElimination &&
1139  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1140  LoadEliminationCost += InlineConstants::InstrCost;
1141  return true;
1142  }
1143 
1144  return false;
1145 }
1146 
1147 bool CallAnalyzer::visitStore(StoreInst &I) {
1148  Value *SROAArg;
1150  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1151  if (I.isSimple()) {
1152  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1153  return true;
1154  }
1155 
1156  disableSROA(CostIt);
1157  }
1158 
1159  // The store can potentially clobber loads and prevent repeated loads from
1160  // being eliminated.
1161  // FIXME:
1162  // 1. We can probably keep an initial set of eliminatable loads substracted
1163  // from the cost even when we finally see a store. We just need to disable
1164  // *further* accumulation of elimination savings.
1165  // 2. We should probably at some point thread MemorySSA for the callee into
1166  // this and then use that to actually compute *really* precise savings.
1167  disableLoadElimination();
1168  return false;
1169 }
1170 
1171 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1172  // Constant folding for extract value is trivial.
1173  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1174  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1175  }))
1176  return true;
1177 
1178  // SROA can look through these but give them a cost.
1179  return false;
1180 }
1181 
1182 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1183  // Constant folding for insert value is trivial.
1184  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1185  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1186  /*InsertedValueOperand*/ COps[1],
1187  I.getIndices());
1188  }))
1189  return true;
1190 
1191  // SROA can look through these but give them a cost.
1192  return false;
1193 }
1194 
1195 /// Try to simplify a call site.
1196 ///
1197 /// Takes a concrete function and callsite and tries to actually simplify it by
1198 /// analyzing the arguments and call itself with instsimplify. Returns true if
1199 /// it has simplified the callsite to some other entity (a constant), making it
1200 /// free.
1201 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
1202  // FIXME: Using the instsimplify logic directly for this is inefficient
1203  // because we have to continually rebuild the argument list even when no
1204  // simplifications can be performed. Until that is fixed with remapping
1205  // inside of instsimplify, directly constant fold calls here.
1206  if (!canConstantFoldCallTo(&Call, F))
1207  return false;
1208 
1209  // Try to re-map the arguments to constants.
1210  SmallVector<Constant *, 4> ConstantArgs;
1211  ConstantArgs.reserve(Call.arg_size());
1212  for (Value *I : Call.args()) {
1213  Constant *C = dyn_cast<Constant>(I);
1214  if (!C)
1215  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
1216  if (!C)
1217  return false; // This argument doesn't map to a constant.
1218 
1219  ConstantArgs.push_back(C);
1220  }
1221  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
1222  SimplifiedValues[&Call] = C;
1223  return true;
1224  }
1225 
1226  return false;
1227 }
1228 
1229 bool CallAnalyzer::visitCallBase(CallBase &Call) {
1230  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
1231  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1232  // This aborts the entire analysis.
1233  ExposesReturnsTwice = true;
1234  return false;
1235  }
1236  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
1237  ContainsNoDuplicateCall = true;
1238 
1239  if (Function *F = Call.getCalledFunction()) {
1240  // When we have a concrete function, first try to simplify it directly.
1241  if (simplifyCallSite(F, Call))
1242  return true;
1243 
1244  // Next check if it is an intrinsic we know about.
1245  // FIXME: Lift this into part of the InstVisitor.
1246  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
1247  switch (II->getIntrinsicID()) {
1248  default:
1249  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1250  disableLoadElimination();
1251  return Base::visitCallBase(Call);
1252 
1253  case Intrinsic::load_relative:
1254  // This is normally lowered to 4 LLVM instructions.
1255  addCost(3 * InlineConstants::InstrCost);
1256  return false;
1257 
1258  case Intrinsic::memset:
1259  case Intrinsic::memcpy:
1260  case Intrinsic::memmove:
1261  disableLoadElimination();
1262  // SROA can usually chew through these intrinsics, but they aren't free.
1263  return false;
1264  case Intrinsic::icall_branch_funnel:
1265  case Intrinsic::localescape:
1266  HasUninlineableIntrinsic = true;
1267  return false;
1268  case Intrinsic::vastart:
1269  InitsVargArgs = true;
1270  return false;
1271  }
1272  }
1273 
1274  if (F == Call.getFunction()) {
1275  // This flag will fully abort the analysis, so don't bother with anything
1276  // else.
1277  IsRecursiveCall = true;
1278  return false;
1279  }
1280 
1281  if (TTI.isLoweredToCall(F)) {
1282  // We account for the average 1 instruction per call argument setup
1283  // here.
1284  addCost(Call.arg_size() * InlineConstants::InstrCost);
1285 
1286  // Everything other than inline ASM will also have a significant cost
1287  // merely from making the call.
1288  if (!isa<InlineAsm>(Call.getCalledValue()))
1290  }
1291 
1292  if (!Call.onlyReadsMemory())
1293  disableLoadElimination();
1294  return Base::visitCallBase(Call);
1295  }
1296 
1297  // Otherwise we're in a very special case -- an indirect function call. See
1298  // if we can be particularly clever about this.
1299  Value *Callee = Call.getCalledValue();
1300 
1301  // First, pay the price of the argument setup. We account for the average
1302  // 1 instruction per call argument setup here.
1303  addCost(Call.arg_size() * InlineConstants::InstrCost);
1304 
1305  // Next, check if this happens to be an indirect function call to a known
1306  // function in this inline context. If not, we've done all we can.
1307  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1308  if (!F) {
1309  if (!Call.onlyReadsMemory())
1310  disableLoadElimination();
1311  return Base::visitCallBase(Call);
1312  }
1313 
1314  // If we have a constant that we are calling as a function, we can peer
1315  // through it and see the function target. This happens not infrequently
1316  // during devirtualization and so we want to give it a hefty bonus for
1317  // inlining, but cap that bonus in the event that inlining wouldn't pan
1318  // out. Pretend to inline the function, with a custom threshold.
1319  auto IndirectCallParams = Params;
1321  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
1322  IndirectCallParams);
1323  if (CA.analyzeCall(Call)) {
1324  // We were able to inline the indirect call! Subtract the cost from the
1325  // threshold to get the bonus we want to apply, but don't go below zero.
1326  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1327  }
1328 
1329  if (!F->onlyReadsMemory())
1330  disableLoadElimination();
1331  return Base::visitCallBase(Call);
1332 }
1333 
1334 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1335  // At least one return instruction will be free after inlining.
1336  bool Free = !HasReturn;
1337  HasReturn = true;
1338  return Free;
1339 }
1340 
1341 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1342  // We model unconditional branches as essentially free -- they really
1343  // shouldn't exist at all, but handling them makes the behavior of the
1344  // inliner more regular and predictable. Interestingly, conditional branches
1345  // which will fold away are also free.
1346  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1347  dyn_cast_or_null<ConstantInt>(
1348  SimplifiedValues.lookup(BI.getCondition()));
1349 }
1350 
1351 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1352  bool CheckSROA = SI.getType()->isPointerTy();
1353  Value *TrueVal = SI.getTrueValue();
1354  Value *FalseVal = SI.getFalseValue();
1355 
1356  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1357  if (!TrueC)
1358  TrueC = SimplifiedValues.lookup(TrueVal);
1359  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1360  if (!FalseC)
1361  FalseC = SimplifiedValues.lookup(FalseVal);
1362  Constant *CondC =
1363  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1364 
1365  if (!CondC) {
1366  // Select C, X, X => X
1367  if (TrueC == FalseC && TrueC) {
1368  SimplifiedValues[&SI] = TrueC;
1369  return true;
1370  }
1371 
1372  if (!CheckSROA)
1373  return Base::visitSelectInst(SI);
1374 
1375  std::pair<Value *, APInt> TrueBaseAndOffset =
1376  ConstantOffsetPtrs.lookup(TrueVal);
1377  std::pair<Value *, APInt> FalseBaseAndOffset =
1378  ConstantOffsetPtrs.lookup(FalseVal);
1379  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1380  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1381 
1382  Value *SROAArg;
1384  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1385  SROAArgValues[&SI] = SROAArg;
1386  return true;
1387  }
1388 
1389  return Base::visitSelectInst(SI);
1390  }
1391 
1392  // Select condition is a constant.
1393  Value *SelectedV = CondC->isAllOnesValue()
1394  ? TrueVal
1395  : (CondC->isNullValue()) ? FalseVal : nullptr;
1396  if (!SelectedV) {
1397  // Condition is a vector constant that is not all 1s or all 0s. If all
1398  // operands are constants, ConstantExpr::getSelect() can handle the cases
1399  // such as select vectors.
1400  if (TrueC && FalseC) {
1401  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1402  SimplifiedValues[&SI] = C;
1403  return true;
1404  }
1405  }
1406  return Base::visitSelectInst(SI);
1407  }
1408 
1409  // Condition is either all 1s or all 0s. SI can be simplified.
1410  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1411  SimplifiedValues[&SI] = SelectedC;
1412  return true;
1413  }
1414 
1415  if (!CheckSROA)
1416  return true;
1417 
1418  std::pair<Value *, APInt> BaseAndOffset =
1419  ConstantOffsetPtrs.lookup(SelectedV);
1420  if (BaseAndOffset.first) {
1421  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1422 
1423  Value *SROAArg;
1425  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1426  SROAArgValues[&SI] = SROAArg;
1427  }
1428 
1429  return true;
1430 }
1431 
1432 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1433  // We model unconditional switches as free, see the comments on handling
1434  // branches.
1435  if (isa<ConstantInt>(SI.getCondition()))
1436  return true;
1437  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1438  if (isa<ConstantInt>(V))
1439  return true;
1440 
1441  // Assume the most general case where the switch is lowered into
1442  // either a jump table, bit test, or a balanced binary tree consisting of
1443  // case clusters without merging adjacent clusters with the same
1444  // destination. We do not consider the switches that are lowered with a mix
1445  // of jump table/bit test/binary search tree. The cost of the switch is
1446  // proportional to the size of the tree or the size of jump table range.
1447  //
1448  // NB: We convert large switches which are just used to initialize large phi
1449  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1450  // inlining those. It will prevent inlining in cases where the optimization
1451  // does not (yet) fire.
1452 
1453  // Maximum valid cost increased in this function.
1454  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1455 
1456  // Exit early for a large switch, assuming one case needs at least one
1457  // instruction.
1458  // FIXME: This is not true for a bit test, but ignore such case for now to
1459  // save compile-time.
1460  int64_t CostLowerBound =
1461  std::min((int64_t)CostUpperBound,
1462  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1463 
1464  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1465  addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
1466  return false;
1467  }
1468 
1469  unsigned JumpTableSize = 0;
1470  unsigned NumCaseCluster =
1471  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1472 
1473  // If suitable for a jump table, consider the cost for the table size and
1474  // branch to destination.
1475  if (JumpTableSize) {
1476  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1477  4 * InlineConstants::InstrCost;
1478 
1479  addCost(JTCost, (int64_t)CostUpperBound);
1480  return false;
1481  }
1482 
1483  // Considering forming a binary search, we should find the number of nodes
1484  // which is same as the number of comparisons when lowered. For a given
1485  // number of clusters, n, we can define a recursive function, f(n), to find
1486  // the number of nodes in the tree. The recursion is :
1487  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1488  // and f(n) = n, when n <= 3.
1489  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1490  // when n > 3. So, the number of comparisons from leaves should be n, while
1491  // the number of non-leaf should be :
1492  // 2^(log2(n) - 1) - 1
1493  // = 2^log2(n) * 2^-1 - 1
1494  // = n / 2 - 1.
1495  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1496  // number of comparisons in a simple closed form :
1497  // n + n / 2 - 1 = n * 3 / 2 - 1
1498  if (NumCaseCluster <= 3) {
1499  // Suppose a comparison includes one compare and one conditional branch.
1500  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
1501  return false;
1502  }
1503 
1504  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1505  int64_t SwitchCost =
1506  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1507 
1508  addCost(SwitchCost, (int64_t)CostUpperBound);
1509  return false;
1510 }
1511 
1512 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1513  // We never want to inline functions that contain an indirectbr. This is
1514  // incorrect because all the blockaddress's (in static global initializers
1515  // for example) would be referring to the original function, and this
1516  // indirect jump would jump from the inlined copy of the function into the
1517  // original function which is extremely undefined behavior.
1518  // FIXME: This logic isn't really right; we can safely inline functions with
1519  // indirectbr's as long as no other function or global references the
1520  // blockaddress of a block within the current function.
1521  HasIndirectBr = true;
1522  return false;
1523 }
1524 
1525 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1526  // FIXME: It's not clear that a single instruction is an accurate model for
1527  // the inline cost of a resume instruction.
1528  return false;
1529 }
1530 
1531 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1532  // FIXME: It's not clear that a single instruction is an accurate model for
1533  // the inline cost of a cleanupret instruction.
1534  return false;
1535 }
1536 
1537 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1538  // FIXME: It's not clear that a single instruction is an accurate model for
1539  // the inline cost of a catchret instruction.
1540  return false;
1541 }
1542 
1543 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1544  // FIXME: It might be reasonably to discount the cost of instructions leading
1545  // to unreachable as they have the lowest possible impact on both runtime and
1546  // code size.
1547  return true; // No actual code is needed for unreachable.
1548 }
1549 
1550 bool CallAnalyzer::visitInstruction(Instruction &I) {
1551  // Some instructions are free. All of the free intrinsics can also be
1552  // handled by SROA, etc.
1553  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1554  return true;
1555 
1556  // We found something we don't understand or can't handle. Mark any SROA-able
1557  // values in the operand list as no longer viable.
1558  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1559  disableSROA(*OI);
1560 
1561  return false;
1562 }
1563 
1564 /// Analyze a basic block for its contribution to the inline cost.
1565 ///
1566 /// This method walks the analyzer over every instruction in the given basic
1567 /// block and accounts for their cost during inlining at this callsite. It
1568 /// aborts early if the threshold has been exceeded or an impossible to inline
1569 /// construct has been detected. It returns false if inlining is no longer
1570 /// viable, and true if inlining remains viable.
1572 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1573  SmallPtrSetImpl<const Value *> &EphValues) {
1574  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1575  // FIXME: Currently, the number of instructions in a function regardless of
1576  // our ability to simplify them during inline to constants or dead code,
1577  // are actually used by the vector bonus heuristic. As long as that's true,
1578  // we have to special case debug intrinsics here to prevent differences in
1579  // inlining due to debug symbols. Eventually, the number of unsimplified
1580  // instructions shouldn't factor into the cost computation, but until then,
1581  // hack around it here.
1582  if (isa<DbgInfoIntrinsic>(I))
1583  continue;
1584 
1585  // Skip ephemeral values.
1586  if (EphValues.count(&*I))
1587  continue;
1588 
1589  ++NumInstructions;
1590  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1591  ++NumVectorInstructions;
1592 
1593  // If the instruction simplified to a constant, there is no cost to this
1594  // instruction. Visit the instructions using our InstVisitor to account for
1595  // all of the per-instruction logic. The visit tree returns true if we
1596  // consumed the instruction in any way, and false if the instruction's base
1597  // cost should count against inlining.
1598  if (Base::visit(&*I))
1599  ++NumInstructionsSimplified;
1600  else
1601  addCost(InlineConstants::InstrCost);
1602 
1603  using namespace ore;
1604  // If the visit this instruction detected an uninlinable pattern, abort.
1605  InlineResult IR;
1606  if (IsRecursiveCall)
1607  IR = "recursive";
1608  else if (ExposesReturnsTwice)
1609  IR = "exposes returns twice";
1610  else if (HasDynamicAlloca)
1611  IR = "dynamic alloca";
1612  else if (HasIndirectBr)
1613  IR = "indirect branch";
1614  else if (HasUninlineableIntrinsic)
1615  IR = "uninlinable intrinsic";
1616  else if (InitsVargArgs)
1617  IR = "varargs";
1618  if (!IR) {
1619  if (ORE)
1620  ORE->emit([&]() {
1621  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1622  &CandidateCall)
1623  << NV("Callee", &F) << " has uninlinable pattern ("
1624  << NV("InlineResult", IR.message)
1625  << ") and cost is not fully computed";
1626  });
1627  return IR;
1628  }
1629 
1630  // If the caller is a recursive function then we don't want to inline
1631  // functions which allocate a lot of stack space because it would increase
1632  // the caller stack usage dramatically.
1633  if (IsCallerRecursive &&
1635  InlineResult IR = "recursive and allocates too much stack space";
1636  if (ORE)
1637  ORE->emit([&]() {
1638  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1639  &CandidateCall)
1640  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1641  << ". Cost is not fully computed";
1642  });
1643  return IR;
1644  }
1645 
1646  // Check if we've passed the maximum possible threshold so we don't spin in
1647  // huge basic blocks that will never inline.
1648  if (Cost >= Threshold && !ComputeFullInlineCost)
1649  return false;
1650  }
1651 
1652  return true;
1653 }
1654 
1655 /// Compute the base pointer and cumulative constant offsets for V.
1656 ///
1657 /// This strips all constant offsets off of V, leaving it the base pointer, and
1658 /// accumulates the total constant offset applied in the returned constant. It
1659 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1660 /// no constant offsets applied.
1661 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1662  if (!V->getType()->isPointerTy())
1663  return nullptr;
1664 
1665  unsigned AS = V->getType()->getPointerAddressSpace();
1666  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1667  APInt Offset = APInt::getNullValue(IntPtrWidth);
1668 
1669  // Even though we don't look through PHI nodes, we could be called on an
1670  // instruction in an unreachable block, which may be on a cycle.
1671  SmallPtrSet<Value *, 4> Visited;
1672  Visited.insert(V);
1673  do {
1674  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1675  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1676  return nullptr;
1677  V = GEP->getPointerOperand();
1678  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1679  V = cast<Operator>(V)->getOperand(0);
1680  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1681  if (GA->isInterposable())
1682  break;
1683  V = GA->getAliasee();
1684  } else {
1685  break;
1686  }
1687  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1688  } while (Visited.insert(V).second);
1689 
1690  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1691  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1692 }
1693 
1694 /// Find dead blocks due to deleted CFG edges during inlining.
1695 ///
1696 /// If we know the successor of the current block, \p CurrBB, has to be \p
1697 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1698 /// no live incoming CFG edges. If one block is found to be dead, we can
1699 /// continue growing the dead block list by checking the successors of the dead
1700 /// blocks to see if all their incoming edges are dead or not.
1701 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1702  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1703  // A CFG edge is dead if the predecessor is dead or the predecessor has a
1704  // known successor which is not the one under exam.
1705  return (DeadBlocks.count(Pred) ||
1706  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1707  };
1708 
1709  auto IsNewlyDead = [&](BasicBlock *BB) {
1710  // If all the edges to a block are dead, the block is also dead.
1711  return (!DeadBlocks.count(BB) &&
1713  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1714  };
1715 
1716  for (BasicBlock *Succ : successors(CurrBB)) {
1717  if (Succ == NextBB || !IsNewlyDead(Succ))
1718  continue;
1720  NewDead.push_back(Succ);
1721  while (!NewDead.empty()) {
1722  BasicBlock *Dead = NewDead.pop_back_val();
1723  if (DeadBlocks.insert(Dead))
1724  // Continue growing the dead block lists.
1725  for (BasicBlock *S : successors(Dead))
1726  if (IsNewlyDead(S))
1727  NewDead.push_back(S);
1728  }
1729  }
1730 }
1731 
1732 /// Analyze a call site for potential inlining.
1733 ///
1734 /// Returns true if inlining this call is viable, and false if it is not
1735 /// viable. It computes the cost and adjusts the threshold based on numerous
1736 /// factors and heuristics. If this method returns false but the computed cost
1737 /// is below the computed threshold, then inlining was forcibly disabled by
1738 /// some artifact of the routine.
1739 InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
1740  ++NumCallsAnalyzed;
1741 
1742  // Perform some tweaks to the cost and threshold based on the direct
1743  // callsite information.
1744 
1745  // We want to more aggressively inline vector-dense kernels, so up the
1746  // threshold, and we'll lower it if the % of vector instructions gets too
1747  // low. Note that these bonuses are some what arbitrary and evolved over time
1748  // by accident as much as because they are principled bonuses.
1749  //
1750  // FIXME: It would be nice to remove all such bonuses. At least it would be
1751  // nice to base the bonus values on something more scientific.
1752  assert(NumInstructions == 0);
1753  assert(NumVectorInstructions == 0);
1754 
1755  // Update the threshold based on callsite properties
1756  updateThreshold(Call, F);
1757 
1758  // While Threshold depends on commandline options that can take negative
1759  // values, we want to enforce the invariant that the computed threshold and
1760  // bonuses are non-negative.
1761  assert(Threshold >= 0);
1762  assert(SingleBBBonus >= 0);
1763  assert(VectorBonus >= 0);
1764 
1765  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1766  // this Threshold any time, and cost cannot decrease, we can stop processing
1767  // the rest of the function body.
1768  Threshold += (SingleBBBonus + VectorBonus);
1769 
1770  // Give out bonuses for the callsite, as the instructions setting them up
1771  // will be gone after inlining.
1772  addCost(-getCallsiteCost(Call, DL));
1773 
1774  // If this function uses the coldcc calling convention, prefer not to inline
1775  // it.
1776  if (F.getCallingConv() == CallingConv::Cold)
1778 
1779  // Check if we're done. This can happen due to bonuses and penalties.
1780  if (Cost >= Threshold && !ComputeFullInlineCost)
1781  return "high cost";
1782 
1783  if (F.empty())
1784  return true;
1785 
1786  Function *Caller = Call.getFunction();
1787  // Check if the caller function is recursive itself.
1788  for (User *U : Caller->users()) {
1789  CallBase *Call = dyn_cast<CallBase>(U);
1790  if (Call && Call->getFunction() == Caller) {
1791  IsCallerRecursive = true;
1792  break;
1793  }
1794  }
1795 
1796  // Populate our simplified values by mapping from function arguments to call
1797  // arguments with known important simplifications.
1798  auto CAI = Call.arg_begin();
1799  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1800  FAI != FAE; ++FAI, ++CAI) {
1801  assert(CAI != Call.arg_end());
1802  if (Constant *C = dyn_cast<Constant>(CAI))
1803  SimplifiedValues[&*FAI] = C;
1804 
1805  Value *PtrArg = *CAI;
1806  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1807  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1808 
1809  // We can SROA any pointer arguments derived from alloca instructions.
1810  if (isa<AllocaInst>(PtrArg)) {
1811  SROAArgValues[&*FAI] = PtrArg;
1812  SROAArgCosts[PtrArg] = 0;
1813  }
1814  }
1815  }
1816  NumConstantArgs = SimplifiedValues.size();
1817  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1818  NumAllocaArgs = SROAArgValues.size();
1819 
1820  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1821  // the ephemeral values multiple times (and they're completely determined by
1822  // the callee, so this is purely duplicate work).
1824  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1825 
1826  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1827  // adding basic blocks of the callee which can be proven to be dead for this
1828  // particular call site in order to get more accurate cost estimates. This
1829  // requires a somewhat heavyweight iteration pattern: we need to walk the
1830  // basic blocks in a breadth-first order as we insert live successors. To
1831  // accomplish this, prioritizing for small iterations because we exit after
1832  // crossing our threshold, we use a small-size optimized SetVector.
1835  BBSetVector;
1836  BBSetVector BBWorklist;
1837  BBWorklist.insert(&F.getEntryBlock());
1838  bool SingleBB = true;
1839  // Note that we *must not* cache the size, this loop grows the worklist.
1840  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1841  // Bail out the moment we cross the threshold. This means we'll under-count
1842  // the cost, but only when undercounting doesn't matter.
1843  if (Cost >= Threshold && !ComputeFullInlineCost)
1844  break;
1845 
1846  BasicBlock *BB = BBWorklist[Idx];
1847  if (BB->empty())
1848  continue;
1849 
1850  // Disallow inlining a blockaddress with uses other than strictly callbr.
1851  // A blockaddress only has defined behavior for an indirect branch in the
1852  // same function, and we do not currently support inlining indirect
1853  // branches. But, the inliner may not see an indirect branch that ends up
1854  // being dead code at a particular call site. If the blockaddress escapes
1855  // the function, e.g., via a global variable, inlining may lead to an
1856  // invalid cross-function reference.
1857  // FIXME: pr/39560: continue relaxing this overt restriction.
1858  if (BB->hasAddressTaken())
1859  for (User *U : BlockAddress::get(&*BB)->users())
1860  if (!isa<CallBrInst>(*U))
1861  return "blockaddress used outside of callbr";
1862 
1863  // Analyze the cost of this block. If we blow through the threshold, this
1864  // returns false, and we can bail on out.
1865  InlineResult IR = analyzeBlock(BB, EphValues);
1866  if (!IR)
1867  return IR;
1868 
1869  Instruction *TI = BB->getTerminator();
1870 
1871  // Add in the live successors by first checking whether we have terminator
1872  // that may be simplified based on the values simplified by this call.
1873  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1874  if (BI->isConditional()) {
1875  Value *Cond = BI->getCondition();
1876  if (ConstantInt *SimpleCond =
1877  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1878  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1879  BBWorklist.insert(NextBB);
1880  KnownSuccessors[BB] = NextBB;
1881  findDeadBlocks(BB, NextBB);
1882  continue;
1883  }
1884  }
1885  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1886  Value *Cond = SI->getCondition();
1887  if (ConstantInt *SimpleCond =
1888  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1889  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1890  BBWorklist.insert(NextBB);
1891  KnownSuccessors[BB] = NextBB;
1892  findDeadBlocks(BB, NextBB);
1893  continue;
1894  }
1895  }
1896 
1897  // If we're unable to select a particular successor, just count all of
1898  // them.
1899  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1900  ++TIdx)
1901  BBWorklist.insert(TI->getSuccessor(TIdx));
1902 
1903  // If we had any successors at this point, than post-inlining is likely to
1904  // have them as well. Note that we assume any basic blocks which existed
1905  // due to branches or switches which folded above will also fold after
1906  // inlining.
1907  if (SingleBB && TI->getNumSuccessors() > 1) {
1908  // Take off the bonus we applied to the threshold.
1909  Threshold -= SingleBBBonus;
1910  SingleBB = false;
1911  }
1912  }
1913 
1914  bool OnlyOneCallAndLocalLinkage =
1915  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1916  // If this is a noduplicate call, we can still inline as long as
1917  // inlining this would cause the removal of the caller (so the instruction
1918  // is not actually duplicated, just moved).
1919  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1920  return "noduplicate";
1921 
1922  // Loops generally act a lot like calls in that they act like barriers to
1923  // movement, require a certain amount of setup, etc. So when optimising for
1924  // size, we penalise any call sites that perform loops. We do this after all
1925  // other costs here, so will likely only be dealing with relatively small
1926  // functions (and hence DT and LI will hopefully be cheap).
1927  if (Caller->hasMinSize()) {
1928  DominatorTree DT(F);
1929  LoopInfo LI(DT);
1930  int NumLoops = 0;
1931  for (Loop *L : LI) {
1932  // Ignore loops that will not be executed
1933  if (DeadBlocks.count(L->getHeader()))
1934  continue;
1935  NumLoops++;
1936  }
1937  addCost(NumLoops * InlineConstants::CallPenalty);
1938  }
1939 
1940  // We applied the maximum possible vector bonus at the beginning. Now,
1941  // subtract the excess bonus, if any, from the Threshold before
1942  // comparing against Cost.
1943  if (NumVectorInstructions <= NumInstructions / 10)
1944  Threshold -= VectorBonus;
1945  else if (NumVectorInstructions <= NumInstructions / 2)
1946  Threshold -= VectorBonus/2;
1947 
1948  return Cost < std::max(1, Threshold);
1949 }
1950 
1951 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1952 /// Dump stats about this call's analysis.
1954 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1955  DEBUG_PRINT_STAT(NumConstantArgs);
1956  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1957  DEBUG_PRINT_STAT(NumAllocaArgs);
1958  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1959  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1960  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1961  DEBUG_PRINT_STAT(NumInstructions);
1962  DEBUG_PRINT_STAT(SROACostSavings);
1963  DEBUG_PRINT_STAT(SROACostSavingsLost);
1964  DEBUG_PRINT_STAT(LoadEliminationCost);
1965  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1966  DEBUG_PRINT_STAT(Cost);
1967  DEBUG_PRINT_STAT(Threshold);
1968 #undef DEBUG_PRINT_STAT
1969 }
1970 #endif
1971 
1972 /// Test that there are no attribute conflicts between Caller and Callee
1973 /// that prevent inlining.
1975  Function *Callee,
1976  TargetTransformInfo &TTI) {
1977  return TTI.areInlineCompatible(Caller, Callee) &&
1978  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1979 }
1980 
1982  int Cost = 0;
1983  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
1984  if (Call.isByValArgument(I)) {
1985  // We approximate the number of loads and stores needed by dividing the
1986  // size of the byval type by the target's pointer size.
1987  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
1988  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1989  unsigned AS = PTy->getAddressSpace();
1990  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1991  // Ceiling division.
1992  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1993 
1994  // If it generates more than 8 stores it is likely to be expanded as an
1995  // inline memcpy so we take that as an upper bound. Otherwise we assume
1996  // one load and one store per word copied.
1997  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1998  // here instead of a magic number of 8, but it's not available via
1999  // DataLayout.
2000  NumStores = std::min(NumStores, 8U);
2001 
2002  Cost += 2 * NumStores * InlineConstants::InstrCost;
2003  } else {
2004  // For non-byval arguments subtract off one instruction per call
2005  // argument.
2007  }
2008  }
2009  // The call instruction also disappears after inlining.
2010  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
2011  return Cost;
2012 }
2013 
2015  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2016  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2019  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2020  GetAssumptionCache, GetBFI, PSI, ORE);
2021 }
2022 
2024  CallBase &Call, Function *Callee, const InlineParams &Params,
2025  TargetTransformInfo &CalleeTTI,
2026  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2029 
2030  // Cannot inline indirect calls.
2031  if (!Callee)
2032  return llvm::InlineCost::getNever("indirect call");
2033 
2034  // Never inline calls with byval arguments that does not have the alloca
2035  // address space. Since byval arguments can be replaced with a copy to an
2036  // alloca, the inlined code would need to be adjusted to handle that the
2037  // argument is in the alloca address space (so it is a little bit complicated
2038  // to solve).
2039  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2040  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2041  if (Call.isByValArgument(I)) {
2042  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2043  if (PTy->getAddressSpace() != AllocaAS)
2044  return llvm::InlineCost::getNever("byval arguments without alloca"
2045  " address space");
2046  }
2047 
2048  // Calls to functions with always-inline attributes should be inlined
2049  // whenever possible.
2050  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2051  auto IsViable = isInlineViable(*Callee);
2052  if (IsViable)
2053  return llvm::InlineCost::getAlways("always inline attribute");
2054  return llvm::InlineCost::getNever(IsViable.message);
2055  }
2056 
2057  // Never inline functions with conflicting attributes (unless callee has
2058  // always-inline attribute).
2059  Function *Caller = Call.getCaller();
2060  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2061  return llvm::InlineCost::getNever("conflicting attributes");
2062 
2063  // Don't inline this call if the caller has the optnone attribute.
2064  if (Caller->hasOptNone())
2065  return llvm::InlineCost::getNever("optnone attribute");
2066 
2067  // Don't inline a function that treats null pointer as valid into a caller
2068  // that does not have this attribute.
2069  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2070  return llvm::InlineCost::getNever("nullptr definitions incompatible");
2071 
2072  // Don't inline functions which can be interposed at link-time.
2073  if (Callee->isInterposable())
2074  return llvm::InlineCost::getNever("interposable");
2075 
2076  // Don't inline functions marked noinline.
2077  if (Callee->hasFnAttribute(Attribute::NoInline))
2078  return llvm::InlineCost::getNever("noinline function attribute");
2079 
2080  // Don't inline call sites marked noinline.
2081  if (Call.isNoInline())
2082  return llvm::InlineCost::getNever("noinline call site attribute");
2083 
2084  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2085  << "... (caller:" << Caller->getName() << ")\n");
2086 
2087  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
2088  Call, Params);
2089  InlineResult ShouldInline = CA.analyzeCall(Call);
2090 
2091  LLVM_DEBUG(CA.dump());
2092 
2093  // Check if there was a reason to force inlining or no inlining.
2094  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2095  return InlineCost::getNever(ShouldInline.message);
2096  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2097  return InlineCost::getAlways("empty function");
2098 
2099  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2100 }
2101 
2103  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2104  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2105  // Disallow inlining of functions which contain indirect branches.
2106  if (isa<IndirectBrInst>(BI->getTerminator()))
2107  return "contains indirect branches";
2108 
2109  // Disallow inlining of blockaddresses which are used by non-callbr
2110  // instructions.
2111  if (BI->hasAddressTaken())
2112  for (User *U : BlockAddress::get(&*BI)->users())
2113  if (!isa<CallBrInst>(*U))
2114  return "blockaddress used outside of callbr";
2115 
2116  for (auto &II : *BI) {
2117  CallBase *Call = dyn_cast<CallBase>(&II);
2118  if (!Call)
2119  continue;
2120 
2121  // Disallow recursive calls.
2122  if (&F == Call->getCalledFunction())
2123  return "recursive call";
2124 
2125  // Disallow calls which expose returns-twice to a function not previously
2126  // attributed as such.
2127  if (!ReturnsTwice && isa<CallInst>(Call) &&
2128  cast<CallInst>(Call)->canReturnTwice())
2129  return "exposes returns-twice attribute";
2130 
2131  if (Call->getCalledFunction())
2132  switch (Call->getCalledFunction()->getIntrinsicID()) {
2133  default:
2134  break;
2135  // Disallow inlining of @llvm.icall.branch.funnel because current
2136  // backend can't separate call targets from call arguments.
2137  case llvm::Intrinsic::icall_branch_funnel:
2138  return "disallowed inlining of @llvm.icall.branch.funnel";
2139  // Disallow inlining functions that call @llvm.localescape. Doing this
2140  // correctly would require major changes to the inliner.
2141  case llvm::Intrinsic::localescape:
2142  return "disallowed inlining of @llvm.localescape";
2143  // Disallow inlining of functions that initialize VarArgs with va_start.
2144  case llvm::Intrinsic::vastart:
2145  return "contains VarArgs initialized with va_start";
2146  }
2147  }
2148  }
2149 
2150  return true;
2151 }
2152 
2153 // APIs to create InlineParams based on command line flags and/or other
2154 // parameters.
2155 
2157  InlineParams Params;
2158 
2159  // This field is the threshold to use for a callee by default. This is
2160  // derived from one or more of:
2161  // * optimization or size-optimization levels,
2162  // * a value passed to createFunctionInliningPass function, or
2163  // * the -inline-threshold flag.
2164  // If the -inline-threshold flag is explicitly specified, that is used
2165  // irrespective of anything else.
2166  if (InlineThreshold.getNumOccurrences() > 0)
2168  else
2169  Params.DefaultThreshold = Threshold;
2170 
2171  // Set the HintThreshold knob from the -inlinehint-threshold.
2172  Params.HintThreshold = HintThreshold;
2173 
2174  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2175  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2176 
2177  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2178  // populate LocallyHotCallSiteThreshold. Later, we populate
2179  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2180  // we know that optimization level is O3 (in the getInlineParams variant that
2181  // takes the opt and size levels).
2182  // FIXME: Remove this check (and make the assignment unconditional) after
2183  // addressing size regression issues at O2.
2184  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2186 
2187  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2189 
2190  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2191  // -inlinehint-threshold commandline option is not explicitly given. If that
2192  // option is present, then its value applies even for callees with size and
2193  // minsize attributes.
2194  // If the -inline-threshold is not specified, set the ColdThreshold from the
2195  // -inlinecold-threshold even if it is not explicitly passed. If
2196  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2197  // explicitly specified to set the ColdThreshold knob
2198  if (InlineThreshold.getNumOccurrences() == 0) {
2201  Params.ColdThreshold = ColdThreshold;
2202  } else if (ColdThreshold.getNumOccurrences() > 0) {
2203  Params.ColdThreshold = ColdThreshold;
2204  }
2205  return Params;
2206 }
2207 
2210 }
2211 
2212 // Compute the default threshold for inlining based on the opt level and the
2213 // size opt level.
2214 static int computeThresholdFromOptLevels(unsigned OptLevel,
2215  unsigned SizeOptLevel) {
2216  if (OptLevel > 2)
2218  if (SizeOptLevel == 1) // -Os
2220  if (SizeOptLevel == 2) // -Oz
2222  return InlineThreshold;
2223 }
2224 
2225 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2226  auto Params =
2227  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2228  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2229  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2230  // when it is specified explicitly.
2231  if (OptLevel > 2)
2233  return Params;
2234 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
Return a value (possibly void), from a function.
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
bool isSimple() const
Definition: Instructions.h:276
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:99
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
Diagnostic information for missed-optimization remarks.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:616
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:164
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
iterator end()
Definition: Function.h:682
Function * getCaller()
Helper to get the caller (the parent function).
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:604
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:37
const int ColdccPenalty
Definition: InlineCost.h:47
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1769
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:732
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:429
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:298
BasicBlock * getSuccessor(unsigned i) const
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:388
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
F(f)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1212
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1946
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2239
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
op_iterator op_begin()
Definition: User.h:229
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:780
Represents the cost of inlining a function.
Definition: InlineCost.h:63
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:269
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:562
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2156
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:662
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:158
Class to represent struct types.
Definition: DerivedTypes.h:233
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
#define DEBUG_PRINT_STAT(x)
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents a cast from a pointer to an integer.
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
bool empty() const
Definition: BasicBlock.h:279
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:802
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:886
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1968
This class represents a no-op cast from one type to another.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
const int LastCallToStaticBonus
Definition: InlineCost.h:46
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:680
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:91
const int CallPenalty
Definition: InlineCost.h:45
InlineResult is basically true or false.
Definition: InlineCost.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
Class to represent pointers.
Definition: DerivedTypes.h:544
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1782
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:766
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2161
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool isNoInline() const
Return true if the call should not be inlined.
Definition: InstrTypes.h:1620
const int IndirectCallThreshold
Definition: InlineCost.h:44
unsigned arg_size() const
Definition: InstrTypes.h:1229
Value * getCalledValue() const
Definition: InstrTypes.h:1280
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1443
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Resume the propagation of an exception.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:40
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:278
op_iterator op_end()
Definition: User.h:231
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:167
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:572
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2043
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:678
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:174
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1373
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1222
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1528
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:843
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:391
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
#define DEBUG_TYPE
Definition: InlineCost.cpp:44
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:161
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
iterator_range< user_iterator > users()
Definition: Value.h:399
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1539
const Value * getFalseValue() const
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:601
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1206
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:736
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:94
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
const char * message
Definition: InlineCost.h:136
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:584
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1756
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1557
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:50
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:40
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:177
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2185
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:34
bool nullPointerIsDefined() const
Check if null pointer dereferencing is considered undefined behavior for the function.
Definition: Function.cpp:1517
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:568
bool isSimple() const
Definition: Instructions.h:401
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Statically lint checks LLVM IR
Definition: Lint.cpp:192
Type * getElementType() const
Definition: DerivedTypes.h:563
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:170
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:155
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:86
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:69
gep_type_iterator gep_type_begin(const User *GEP)