LLVM  9.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "inline-cost"
44 
45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
46 
48  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
49  cl::desc("Control the amount of inlining to perform (default = 225)"));
50 
52  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
53  cl::desc("Threshold for inlining functions with inline hint"));
54 
55 static cl::opt<int>
56  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
58  cl::desc("Threshold for inlining cold callsites"));
59 
60 // We introduce this threshold to help performance of instrumentation based
61 // PGO before we actually hook up inliner with analysis passes such as BPI and
62 // BFI.
64  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
65  cl::desc("Threshold for inlining functions with cold attribute"));
66 
67 static cl::opt<int>
68  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
70  cl::desc("Threshold for hot callsites "));
71 
73  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
74  cl::desc("Threshold for locally hot callsites "));
75 
77  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
78  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
79  "entry frequency, for a callsite to be cold in the absence of "
80  "profile information."));
81 
83  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
84  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
85  "entry frequency, for a callsite to be hot in the absence of "
86  "profile information."));
87 
89  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
90  cl::desc("Compute the full inline cost of a call site even when the cost "
91  "exceeds the threshold."));
92 
93 namespace {
94 
95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
97  friend class InstVisitor<CallAnalyzer, bool>;
98 
99  /// The TargetTransformInfo available for this compilation.
100  const TargetTransformInfo &TTI;
101 
102  /// Getter for the cache of @llvm.assume intrinsics.
103  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
104 
105  /// Getter for BlockFrequencyInfo
107 
108  /// Profile summary information.
109  ProfileSummaryInfo *PSI;
110 
111  /// The called function.
112  Function &F;
113 
114  // Cache the DataLayout since we use it a lot.
115  const DataLayout &DL;
116 
117  /// The OptimizationRemarkEmitter available for this compilation.
119 
120  /// The candidate callsite being analyzed. Please do not use this to do
121  /// analysis in the caller function; we want the inline cost query to be
122  /// easily cacheable. Instead, use the cover function paramHasAttr.
123  CallBase &CandidateCall;
124 
125  /// Tunable parameters that control the analysis.
126  const InlineParams &Params;
127 
128  /// Upper bound for the inlining cost. Bonuses are being applied to account
129  /// for speculative "expected profit" of the inlining decision.
130  int Threshold;
131 
132  /// Inlining cost measured in abstract units, accounts for all the
133  /// instructions expected to be executed for a given function invocation.
134  /// Instructions that are statically proven to be dead based on call-site
135  /// arguments are not counted here.
136  int Cost = 0;
137 
138  bool ComputeFullInlineCost;
139 
140  bool IsCallerRecursive = false;
141  bool IsRecursiveCall = false;
142  bool ExposesReturnsTwice = false;
143  bool HasDynamicAlloca = false;
144  bool ContainsNoDuplicateCall = false;
145  bool HasReturn = false;
146  bool HasIndirectBr = false;
147  bool HasUninlineableIntrinsic = false;
148  bool InitsVargArgs = false;
149 
150  /// Number of bytes allocated statically by the callee.
151  uint64_t AllocatedSize = 0;
152  unsigned NumInstructions = 0;
153  unsigned NumVectorInstructions = 0;
154 
155  /// Bonus to be applied when percentage of vector instructions in callee is
156  /// high (see more details in updateThreshold).
157  int VectorBonus = 0;
158  /// Bonus to be applied when the callee has only one reachable basic block.
159  int SingleBBBonus = 0;
160 
161  /// While we walk the potentially-inlined instructions, we build up and
162  /// maintain a mapping of simplified values specific to this callsite. The
163  /// idea is to propagate any special information we have about arguments to
164  /// this call through the inlinable section of the function, and account for
165  /// likely simplifications post-inlining. The most important aspect we track
166  /// is CFG altering simplifications -- when we prove a basic block dead, that
167  /// can cause dramatic shifts in the cost of inlining a function.
168  DenseMap<Value *, Constant *> SimplifiedValues;
169 
170  /// Keep track of the values which map back (through function arguments) to
171  /// allocas on the caller stack which could be simplified through SROA.
172  DenseMap<Value *, Value *> SROAArgValues;
173 
174  /// The mapping of caller Alloca values to their accumulated cost savings. If
175  /// we have to disable SROA for one of the allocas, this tells us how much
176  /// cost must be added.
177  DenseMap<Value *, int> SROAArgCosts;
178 
179  /// Keep track of values which map to a pointer base and constant offset.
181 
182  /// Keep track of dead blocks due to the constant arguments.
183  SetVector<BasicBlock *> DeadBlocks;
184 
185  /// The mapping of the blocks to their known unique successors due to the
186  /// constant arguments.
187  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
188 
189  /// Model the elimination of repeated loads that is expected to happen
190  /// whenever we simplify away the stores that would otherwise cause them to be
191  /// loads.
192  bool EnableLoadElimination;
193  SmallPtrSet<Value *, 16> LoadAddrSet;
194  int LoadEliminationCost = 0;
195 
196  // Custom simplification helper routines.
197  bool isAllocaDerivedArg(Value *V);
198  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
200  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
201  void disableSROA(Value *V);
202  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
203  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
204  int InstructionCost);
205  void disableLoadElimination();
206  bool isGEPFree(GetElementPtrInst &GEP);
207  bool canFoldInboundsGEP(GetElementPtrInst &I);
208  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
209  bool simplifyCallSite(Function *F, CallBase &Call);
210  template <typename Callable>
211  bool simplifyInstruction(Instruction &I, Callable Evaluate);
212  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
213 
214  /// Return true if the given argument to the function being considered for
215  /// inlining has the given attribute set either at the call site or the
216  /// function declaration. Primarily used to inspect call site specific
217  /// attributes since these can be more precise than the ones on the callee
218  /// itself.
219  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
220 
221  /// Return true if the given value is known non null within the callee if
222  /// inlined through this particular callsite.
223  bool isKnownNonNullInCallee(Value *V);
224 
225  /// Update Threshold based on callsite properties such as callee
226  /// attributes and callee hotness for PGO builds. The Callee is explicitly
227  /// passed to support analyzing indirect calls whose target is inferred by
228  /// analysis.
229  void updateThreshold(CallBase &Call, Function &Callee);
230 
231  /// Return true if size growth is allowed when inlining the callee at \p Call.
232  bool allowSizeGrowth(CallBase &Call);
233 
234  /// Return true if \p Call is a cold callsite.
235  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
236 
237  /// Return a higher threshold if \p Call is a hot callsite.
238  Optional<int> getHotCallSiteThreshold(CallBase &Call,
239  BlockFrequencyInfo *CallerBFI);
240 
241  // Custom analysis routines.
242  InlineResult analyzeBlock(BasicBlock *BB,
243  SmallPtrSetImpl<const Value *> &EphValues);
244 
245  /// Handle a capped 'int' increment for Cost.
246  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
247  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
248  Cost = (int)std::min(UpperBound, Cost + Inc);
249  }
250 
251  // Disable several entry points to the visitor so we don't accidentally use
252  // them by declaring but not defining them here.
253  void visit(Module *);
254  void visit(Module &);
255  void visit(Function *);
256  void visit(Function &);
257  void visit(BasicBlock *);
258  void visit(BasicBlock &);
259 
260  // Provide base case for our instruction visit.
261  bool visitInstruction(Instruction &I);
262 
263  // Our visit overrides.
264  bool visitAlloca(AllocaInst &I);
265  bool visitPHI(PHINode &I);
266  bool visitGetElementPtr(GetElementPtrInst &I);
267  bool visitBitCast(BitCastInst &I);
268  bool visitPtrToInt(PtrToIntInst &I);
269  bool visitIntToPtr(IntToPtrInst &I);
270  bool visitCastInst(CastInst &I);
271  bool visitUnaryInstruction(UnaryInstruction &I);
272  bool visitCmpInst(CmpInst &I);
273  bool visitSub(BinaryOperator &I);
274  bool visitBinaryOperator(BinaryOperator &I);
275  bool visitLoad(LoadInst &I);
276  bool visitStore(StoreInst &I);
277  bool visitExtractValue(ExtractValueInst &I);
278  bool visitInsertValue(InsertValueInst &I);
279  bool visitCallBase(CallBase &Call);
280  bool visitReturnInst(ReturnInst &RI);
281  bool visitBranchInst(BranchInst &BI);
282  bool visitSelectInst(SelectInst &SI);
283  bool visitSwitchInst(SwitchInst &SI);
284  bool visitIndirectBrInst(IndirectBrInst &IBI);
285  bool visitResumeInst(ResumeInst &RI);
286  bool visitCleanupReturnInst(CleanupReturnInst &RI);
287  bool visitCatchReturnInst(CatchReturnInst &RI);
288  bool visitUnreachableInst(UnreachableInst &I);
289 
290 public:
291  CallAnalyzer(const TargetTransformInfo &TTI,
292  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
295  Function &Callee, CallBase &Call, const InlineParams &Params)
296  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
297  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
298  CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
299  ComputeFullInlineCost(OptComputeFullInlineCost ||
300  Params.ComputeFullInlineCost || ORE),
301  EnableLoadElimination(true) {}
302 
303  InlineResult analyzeCall(CallBase &Call);
304 
305  int getThreshold() { return Threshold; }
306  int getCost() { return Cost; }
307 
308  // Keep a bunch of stats about the cost savings found so we can print them
309  // out when debugging.
310  unsigned NumConstantArgs = 0;
311  unsigned NumConstantOffsetPtrArgs = 0;
312  unsigned NumAllocaArgs = 0;
313  unsigned NumConstantPtrCmps = 0;
314  unsigned NumConstantPtrDiffs = 0;
315  unsigned NumInstructionsSimplified = 0;
316  unsigned SROACostSavings = 0;
317  unsigned SROACostSavingsLost = 0;
318 
319  void dump();
320 };
321 
322 } // namespace
323 
324 /// Test whether the given value is an Alloca-derived function argument.
325 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
326  return SROAArgValues.count(V);
327 }
328 
329 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
330 /// Returns false if V does not map to a SROA-candidate.
331 bool CallAnalyzer::lookupSROAArgAndCost(
333  if (SROAArgValues.empty() || SROAArgCosts.empty())
334  return false;
335 
336  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
337  if (ArgIt == SROAArgValues.end())
338  return false;
339 
340  Arg = ArgIt->second;
341  CostIt = SROAArgCosts.find(Arg);
342  return CostIt != SROAArgCosts.end();
343 }
344 
345 /// Disable SROA for the candidate marked by this cost iterator.
346 ///
347 /// This marks the candidate as no longer viable for SROA, and adds the cost
348 /// savings associated with it back into the inline cost measurement.
349 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
350  // If we're no longer able to perform SROA we need to undo its cost savings
351  // and prevent subsequent analysis.
352  addCost(CostIt->second);
353  SROACostSavings -= CostIt->second;
354  SROACostSavingsLost += CostIt->second;
355  SROAArgCosts.erase(CostIt);
356  disableLoadElimination();
357 }
358 
359 /// If 'V' maps to a SROA candidate, disable SROA for it.
360 void CallAnalyzer::disableSROA(Value *V) {
361  Value *SROAArg;
363  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
364  disableSROA(CostIt);
365 }
366 
367 /// Accumulate the given cost for a particular SROA candidate.
368 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
369  int InstructionCost) {
370  CostIt->second += InstructionCost;
371  SROACostSavings += InstructionCost;
372 }
373 
374 void CallAnalyzer::disableLoadElimination() {
375  if (EnableLoadElimination) {
376  addCost(LoadEliminationCost);
377  LoadEliminationCost = 0;
378  EnableLoadElimination = false;
379  }
380 }
381 
382 /// Accumulate a constant GEP offset into an APInt if possible.
383 ///
384 /// Returns false if unable to compute the offset for any reason. Respects any
385 /// simplified values known during the analysis of this callsite.
386 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
387  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
388  assert(IntPtrWidth == Offset.getBitWidth());
389 
390  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
391  GTI != GTE; ++GTI) {
392  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
393  if (!OpC)
394  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
395  OpC = dyn_cast<ConstantInt>(SimpleOp);
396  if (!OpC)
397  return false;
398  if (OpC->isZero())
399  continue;
400 
401  // Handle a struct index, which adds its field offset to the pointer.
402  if (StructType *STy = GTI.getStructTypeOrNull()) {
403  unsigned ElementIdx = OpC->getZExtValue();
404  const StructLayout *SL = DL.getStructLayout(STy);
405  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
406  continue;
407  }
408 
409  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
410  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
411  }
412  return true;
413 }
414 
415 /// Use TTI to check whether a GEP is free.
416 ///
417 /// Respects any simplified values known during the analysis of this callsite.
418 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
419  SmallVector<Value *, 4> Operands;
420  Operands.push_back(GEP.getOperand(0));
421  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
422  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
423  Operands.push_back(SimpleOp);
424  else
425  Operands.push_back(*I);
426  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
427 }
428 
429 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
430  // Check whether inlining will turn a dynamic alloca into a static
431  // alloca and handle that case.
432  if (I.isArrayAllocation()) {
433  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
434  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
435  Type *Ty = I.getAllocatedType();
436  AllocatedSize = SaturatingMultiplyAdd(
437  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
438  return Base::visitAlloca(I);
439  }
440  }
441 
442  // Accumulate the allocated size.
443  if (I.isStaticAlloca()) {
444  Type *Ty = I.getAllocatedType();
445  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
446  }
447 
448  // We will happily inline static alloca instructions.
449  if (I.isStaticAlloca())
450  return Base::visitAlloca(I);
451 
452  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
453  // a variety of reasons, and so we would like to not inline them into
454  // functions which don't currently have a dynamic alloca. This simply
455  // disables inlining altogether in the presence of a dynamic alloca.
456  HasDynamicAlloca = true;
457  return false;
458 }
459 
460 bool CallAnalyzer::visitPHI(PHINode &I) {
461  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
462  // though we don't want to propagate it's bonuses. The idea is to disable
463  // SROA if it *might* be used in an inappropriate manner.
464 
465  // Phi nodes are always zero-cost.
466  // FIXME: Pointer sizes may differ between different address spaces, so do we
467  // need to use correct address space in the call to getPointerSizeInBits here?
468  // Or could we skip the getPointerSizeInBits call completely? As far as I can
469  // see the ZeroOffset is used as a dummy value, so we can probably use any
470  // bit width for the ZeroOffset?
471  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
472  bool CheckSROA = I.getType()->isPointerTy();
473 
474  // Track the constant or pointer with constant offset we've seen so far.
475  Constant *FirstC = nullptr;
476  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
477  Value *FirstV = nullptr;
478 
479  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
480  BasicBlock *Pred = I.getIncomingBlock(i);
481  // If the incoming block is dead, skip the incoming block.
482  if (DeadBlocks.count(Pred))
483  continue;
484  // If the parent block of phi is not the known successor of the incoming
485  // block, skip the incoming block.
486  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
487  if (KnownSuccessor && KnownSuccessor != I.getParent())
488  continue;
489 
490  Value *V = I.getIncomingValue(i);
491  // If the incoming value is this phi itself, skip the incoming value.
492  if (&I == V)
493  continue;
494 
495  Constant *C = dyn_cast<Constant>(V);
496  if (!C)
497  C = SimplifiedValues.lookup(V);
498 
499  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
500  if (!C && CheckSROA)
501  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
502 
503  if (!C && !BaseAndOffset.first)
504  // The incoming value is neither a constant nor a pointer with constant
505  // offset, exit early.
506  return true;
507 
508  if (FirstC) {
509  if (FirstC == C)
510  // If we've seen a constant incoming value before and it is the same
511  // constant we see this time, continue checking the next incoming value.
512  continue;
513  // Otherwise early exit because we either see a different constant or saw
514  // a constant before but we have a pointer with constant offset this time.
515  return true;
516  }
517 
518  if (FirstV) {
519  // The same logic as above, but check pointer with constant offset here.
520  if (FirstBaseAndOffset == BaseAndOffset)
521  continue;
522  return true;
523  }
524 
525  if (C) {
526  // This is the 1st time we've seen a constant, record it.
527  FirstC = C;
528  continue;
529  }
530 
531  // The remaining case is that this is the 1st time we've seen a pointer with
532  // constant offset, record it.
533  FirstV = V;
534  FirstBaseAndOffset = BaseAndOffset;
535  }
536 
537  // Check if we can map phi to a constant.
538  if (FirstC) {
539  SimplifiedValues[&I] = FirstC;
540  return true;
541  }
542 
543  // Check if we can map phi to a pointer with constant offset.
544  if (FirstBaseAndOffset.first) {
545  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
546 
547  Value *SROAArg;
549  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
550  SROAArgValues[&I] = SROAArg;
551  }
552 
553  return true;
554 }
555 
556 /// Check we can fold GEPs of constant-offset call site argument pointers.
557 /// This requires target data and inbounds GEPs.
558 ///
559 /// \return true if the specified GEP can be folded.
560 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
561  // Check if we have a base + offset for the pointer.
562  std::pair<Value *, APInt> BaseAndOffset =
563  ConstantOffsetPtrs.lookup(I.getPointerOperand());
564  if (!BaseAndOffset.first)
565  return false;
566 
567  // Check if the offset of this GEP is constant, and if so accumulate it
568  // into Offset.
569  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
570  return false;
571 
572  // Add the result as a new mapping to Base + Offset.
573  ConstantOffsetPtrs[&I] = BaseAndOffset;
574 
575  return true;
576 }
577 
578 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
579  Value *SROAArg;
581  bool SROACandidate =
582  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
583 
584  // Lambda to check whether a GEP's indices are all constant.
585  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
586  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
587  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
588  return false;
589  return true;
590  };
591 
592  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
593  if (SROACandidate)
594  SROAArgValues[&I] = SROAArg;
595 
596  // Constant GEPs are modeled as free.
597  return true;
598  }
599 
600  // Variable GEPs will require math and will disable SROA.
601  if (SROACandidate)
602  disableSROA(CostIt);
603  return isGEPFree(I);
604 }
605 
606 /// Simplify \p I if its operands are constants and update SimplifiedValues.
607 /// \p Evaluate is a callable specific to instruction type that evaluates the
608 /// instruction when all the operands are constants.
609 template <typename Callable>
610 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
612  for (Value *Op : I.operands()) {
613  Constant *COp = dyn_cast<Constant>(Op);
614  if (!COp)
615  COp = SimplifiedValues.lookup(Op);
616  if (!COp)
617  return false;
618  COps.push_back(COp);
619  }
620  auto *C = Evaluate(COps);
621  if (!C)
622  return false;
623  SimplifiedValues[&I] = C;
624  return true;
625 }
626 
627 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
628  // Propagate constants through bitcasts.
629  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
630  return ConstantExpr::getBitCast(COps[0], I.getType());
631  }))
632  return true;
633 
634  // Track base/offsets through casts
635  std::pair<Value *, APInt> BaseAndOffset =
636  ConstantOffsetPtrs.lookup(I.getOperand(0));
637  // Casts don't change the offset, just wrap it up.
638  if (BaseAndOffset.first)
639  ConstantOffsetPtrs[&I] = BaseAndOffset;
640 
641  // Also look for SROA candidates here.
642  Value *SROAArg;
644  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
645  SROAArgValues[&I] = SROAArg;
646 
647  // Bitcasts are always zero cost.
648  return true;
649 }
650 
651 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
652  // Propagate constants through ptrtoint.
653  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
654  return ConstantExpr::getPtrToInt(COps[0], I.getType());
655  }))
656  return true;
657 
658  // Track base/offset pairs when converted to a plain integer provided the
659  // integer is large enough to represent the pointer.
660  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
661  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
662  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
663  std::pair<Value *, APInt> BaseAndOffset =
664  ConstantOffsetPtrs.lookup(I.getOperand(0));
665  if (BaseAndOffset.first)
666  ConstantOffsetPtrs[&I] = BaseAndOffset;
667  }
668 
669  // This is really weird. Technically, ptrtoint will disable SROA. However,
670  // unless that ptrtoint is *used* somewhere in the live basic blocks after
671  // inlining, it will be nuked, and SROA should proceed. All of the uses which
672  // would block SROA would also block SROA if applied directly to a pointer,
673  // and so we can just add the integer in here. The only places where SROA is
674  // preserved either cannot fire on an integer, or won't in-and-of themselves
675  // disable SROA (ext) w/o some later use that we would see and disable.
676  Value *SROAArg;
678  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
679  SROAArgValues[&I] = SROAArg;
680 
681  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
682 }
683 
684 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
685  // Propagate constants through ptrtoint.
686  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
687  return ConstantExpr::getIntToPtr(COps[0], I.getType());
688  }))
689  return true;
690 
691  // Track base/offset pairs when round-tripped through a pointer without
692  // modifications provided the integer is not too large.
693  Value *Op = I.getOperand(0);
694  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
695  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
696  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
697  if (BaseAndOffset.first)
698  ConstantOffsetPtrs[&I] = BaseAndOffset;
699  }
700 
701  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
702  Value *SROAArg;
704  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
705  SROAArgValues[&I] = SROAArg;
706 
707  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
708 }
709 
710 bool CallAnalyzer::visitCastInst(CastInst &I) {
711  // Propagate constants through ptrtoint.
712  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
713  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
714  }))
715  return true;
716 
717  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
718  disableSROA(I.getOperand(0));
719 
720  // If this is a floating-point cast, and the target says this operation
721  // is expensive, this may eventually become a library call. Treat the cost
722  // as such.
723  switch (I.getOpcode()) {
724  case Instruction::FPTrunc:
725  case Instruction::FPExt:
726  case Instruction::UIToFP:
727  case Instruction::SIToFP:
728  case Instruction::FPToUI:
729  case Instruction::FPToSI:
730  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
732  break;
733  default:
734  break;
735  }
736 
737  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
738 }
739 
740 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
741  Value *Operand = I.getOperand(0);
742  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
743  return ConstantFoldInstOperands(&I, COps[0], DL);
744  }))
745  return true;
746 
747  // Disable any SROA on the argument to arbitrary unary operators.
748  disableSROA(Operand);
749 
750  return false;
751 }
752 
753 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
754  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
755 }
756 
757 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
758  // Does the *call site* have the NonNull attribute set on an argument? We
759  // use the attribute on the call site to memoize any analysis done in the
760  // caller. This will also trip if the callee function has a non-null
761  // parameter attribute, but that's a less interesting case because hopefully
762  // the callee would already have been simplified based on that.
763  if (Argument *A = dyn_cast<Argument>(V))
764  if (paramHasAttr(A, Attribute::NonNull))
765  return true;
766 
767  // Is this an alloca in the caller? This is distinct from the attribute case
768  // above because attributes aren't updated within the inliner itself and we
769  // always want to catch the alloca derived case.
770  if (isAllocaDerivedArg(V))
771  // We can actually predict the result of comparisons between an
772  // alloca-derived value and null. Note that this fires regardless of
773  // SROA firing.
774  return true;
775 
776  return false;
777 }
778 
779 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
780  // If the normal destination of the invoke or the parent block of the call
781  // site is unreachable-terminated, there is little point in inlining this
782  // unless there is literally zero cost.
783  // FIXME: Note that it is possible that an unreachable-terminated block has a
784  // hot entry. For example, in below scenario inlining hot_call_X() may be
785  // beneficial :
786  // main() {
787  // hot_call_1();
788  // ...
789  // hot_call_N()
790  // exit(0);
791  // }
792  // For now, we are not handling this corner case here as it is rare in real
793  // code. In future, we should elaborate this based on BPI and BFI in more
794  // general threshold adjusting heuristics in updateThreshold().
795  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
796  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
797  return false;
798  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
799  return false;
800 
801  return true;
802 }
803 
805  BlockFrequencyInfo *CallerBFI) {
806  // If global profile summary is available, then callsite's coldness is
807  // determined based on that.
808  if (PSI && PSI->hasProfileSummary())
809  return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
810 
811  // Otherwise we need BFI to be available.
812  if (!CallerBFI)
813  return false;
814 
815  // Determine if the callsite is cold relative to caller's entry. We could
816  // potentially cache the computation of scaled entry frequency, but the added
817  // complexity is not worth it unless this scaling shows up high in the
818  // profiles.
819  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
820  auto CallSiteBB = Call.getParent();
821  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
822  auto CallerEntryFreq =
823  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
824  return CallSiteFreq < CallerEntryFreq * ColdProb;
825 }
826 
828 CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
829  BlockFrequencyInfo *CallerBFI) {
830 
831  // If global profile summary is available, then callsite's hotness is
832  // determined based on that.
833  if (PSI && PSI->hasProfileSummary() &&
834  PSI->isHotCallSite(CallSite(&Call), CallerBFI))
835  return Params.HotCallSiteThreshold;
836 
837  // Otherwise we need BFI to be available and to have a locally hot callsite
838  // threshold.
839  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
840  return None;
841 
842  // Determine if the callsite is hot relative to caller's entry. We could
843  // potentially cache the computation of scaled entry frequency, but the added
844  // complexity is not worth it unless this scaling shows up high in the
845  // profiles.
846  auto CallSiteBB = Call.getParent();
847  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
848  auto CallerEntryFreq = CallerBFI->getEntryFreq();
849  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
850  return Params.LocallyHotCallSiteThreshold;
851 
852  // Otherwise treat it normally.
853  return None;
854 }
855 
856 void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
857  // If no size growth is allowed for this inlining, set Threshold to 0.
858  if (!allowSizeGrowth(Call)) {
859  Threshold = 0;
860  return;
861  }
862 
863  Function *Caller = Call.getCaller();
864 
865  // return min(A, B) if B is valid.
866  auto MinIfValid = [](int A, Optional<int> B) {
867  return B ? std::min(A, B.getValue()) : A;
868  };
869 
870  // return max(A, B) if B is valid.
871  auto MaxIfValid = [](int A, Optional<int> B) {
872  return B ? std::max(A, B.getValue()) : A;
873  };
874 
875  // Various bonus percentages. These are multiplied by Threshold to get the
876  // bonus values.
877  // SingleBBBonus: This bonus is applied if the callee has a single reachable
878  // basic block at the given callsite context. This is speculatively applied
879  // and withdrawn if more than one basic block is seen.
880  //
881  // Vector bonuses: We want to more aggressively inline vector-dense kernels
882  // and apply this bonus based on the percentage of vector instructions. A
883  // bonus is applied if the vector instructions exceed 50% and half that amount
884  // is applied if it exceeds 10%. Note that these bonuses are some what
885  // arbitrary and evolved over time by accident as much as because they are
886  // principled bonuses.
887  // FIXME: It would be nice to base the bonus values on something more
888  // scientific.
889  //
890  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
891  // of the last call to a static function as inlining such functions is
892  // guaranteed to reduce code size.
893  //
894  // These bonus percentages may be set to 0 based on properties of the caller
895  // and the callsite.
896  int SingleBBBonusPercent = 50;
897  int VectorBonusPercent = 150;
899 
900  // Lambda to set all the above bonus and bonus percentages to 0.
901  auto DisallowAllBonuses = [&]() {
902  SingleBBBonusPercent = 0;
903  VectorBonusPercent = 0;
904  LastCallToStaticBonus = 0;
905  };
906 
907  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
908  // and reduce the threshold if the caller has the necessary attribute.
909  if (Caller->hasMinSize()) {
910  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
911  // For minsize, we want to disable the single BB bonus and the vector
912  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
913  // a static function will, at the minimum, eliminate the parameter setup and
914  // call/return instructions.
915  SingleBBBonusPercent = 0;
916  VectorBonusPercent = 0;
917  } else if (Caller->hasOptSize())
918  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
919 
920  // Adjust the threshold based on inlinehint attribute and profile based
921  // hotness information if the caller does not have MinSize attribute.
922  if (!Caller->hasMinSize()) {
923  if (Callee.hasFnAttribute(Attribute::InlineHint))
924  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
925 
926  // FIXME: After switching to the new passmanager, simplify the logic below
927  // by checking only the callsite hotness/coldness as we will reliably
928  // have local profile information.
929  //
930  // Callsite hotness and coldness can be determined if sample profile is
931  // used (which adds hotness metadata to calls) or if caller's
932  // BlockFrequencyInfo is available.
933  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
934  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
935  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
936  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
937  // FIXME: This should update the threshold only if it exceeds the
938  // current threshold, but AutoFDO + ThinLTO currently relies on this
939  // behavior to prevent inlining of hot callsites during ThinLTO
940  // compile phase.
941  Threshold = HotCallSiteThreshold.getValue();
942  } else if (isColdCallSite(Call, CallerBFI)) {
943  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
944  // Do not apply bonuses for a cold callsite including the
945  // LastCallToStatic bonus. While this bonus might result in code size
946  // reduction, it can cause the size of a non-cold caller to increase
947  // preventing it from being inlined.
948  DisallowAllBonuses();
949  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
950  } else if (PSI) {
951  // Use callee's global profile information only if we have no way of
952  // determining this via callsite information.
953  if (PSI->isFunctionEntryHot(&Callee)) {
954  LLVM_DEBUG(dbgs() << "Hot callee.\n");
955  // If callsite hotness can not be determined, we may still know
956  // that the callee is hot and treat it as a weaker hint for threshold
957  // increase.
958  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
959  } else if (PSI->isFunctionEntryCold(&Callee)) {
960  LLVM_DEBUG(dbgs() << "Cold callee.\n");
961  // Do not apply bonuses for a cold callee including the
962  // LastCallToStatic bonus. While this bonus might result in code size
963  // reduction, it can cause the size of a non-cold caller to increase
964  // preventing it from being inlined.
965  DisallowAllBonuses();
966  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
967  }
968  }
969  }
970 
971  // Finally, take the target-specific inlining threshold multiplier into
972  // account.
973  Threshold *= TTI.getInliningThresholdMultiplier();
974 
975  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
976  VectorBonus = Threshold * VectorBonusPercent / 100;
977 
978  bool OnlyOneCallAndLocalLinkage =
979  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
980  // If there is only one call of the function, and it has internal linkage,
981  // the cost of inlining it drops dramatically. It may seem odd to update
982  // Cost in updateThreshold, but the bonus depends on the logic in this method.
983  if (OnlyOneCallAndLocalLinkage)
984  Cost -= LastCallToStaticBonus;
985 }
986 
987 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
988  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
989  // First try to handle simplified comparisons.
990  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
991  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
992  }))
993  return true;
994 
995  if (I.getOpcode() == Instruction::FCmp)
996  return false;
997 
998  // Otherwise look for a comparison between constant offset pointers with
999  // a common base.
1000  Value *LHSBase, *RHSBase;
1001  APInt LHSOffset, RHSOffset;
1002  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1003  if (LHSBase) {
1004  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1005  if (RHSBase && LHSBase == RHSBase) {
1006  // We have common bases, fold the icmp to a constant based on the
1007  // offsets.
1008  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1009  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1010  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1011  SimplifiedValues[&I] = C;
1012  ++NumConstantPtrCmps;
1013  return true;
1014  }
1015  }
1016  }
1017 
1018  // If the comparison is an equality comparison with null, we can simplify it
1019  // if we know the value (argument) can't be null
1020  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1021  isKnownNonNullInCallee(I.getOperand(0))) {
1022  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1023  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1025  return true;
1026  }
1027  // Finally check for SROA candidates in comparisons.
1028  Value *SROAArg;
1030  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1031  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1032  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1033  return true;
1034  }
1035 
1036  disableSROA(CostIt);
1037  }
1038 
1039  return false;
1040 }
1041 
1042 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1043  // Try to handle a special case: we can fold computing the difference of two
1044  // constant-related pointers.
1045  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1046  Value *LHSBase, *RHSBase;
1047  APInt LHSOffset, RHSOffset;
1048  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1049  if (LHSBase) {
1050  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1051  if (RHSBase && LHSBase == RHSBase) {
1052  // We have common bases, fold the subtract to a constant based on the
1053  // offsets.
1054  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1055  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1056  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1057  SimplifiedValues[&I] = C;
1058  ++NumConstantPtrDiffs;
1059  return true;
1060  }
1061  }
1062  }
1063 
1064  // Otherwise, fall back to the generic logic for simplifying and handling
1065  // instructions.
1066  return Base::visitSub(I);
1067 }
1068 
1069 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1070  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1071  Constant *CLHS = dyn_cast<Constant>(LHS);
1072  if (!CLHS)
1073  CLHS = SimplifiedValues.lookup(LHS);
1074  Constant *CRHS = dyn_cast<Constant>(RHS);
1075  if (!CRHS)
1076  CRHS = SimplifiedValues.lookup(RHS);
1077 
1078  Value *SimpleV = nullptr;
1079  if (auto FI = dyn_cast<FPMathOperator>(&I))
1080  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1081  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1082  else
1083  SimpleV =
1084  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1085 
1086  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1087  SimplifiedValues[&I] = C;
1088 
1089  if (SimpleV)
1090  return true;
1091 
1092  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1093  disableSROA(LHS);
1094  disableSROA(RHS);
1095 
1096  // If the instruction is floating point, and the target says this operation
1097  // is expensive, this may eventually become a library call. Treat the cost
1098  // as such.
1099  if (I.getType()->isFloatingPointTy() &&
1100  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1102 
1103  return false;
1104 }
1105 
1106 bool CallAnalyzer::visitLoad(LoadInst &I) {
1107  Value *SROAArg;
1109  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1110  if (I.isSimple()) {
1111  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1112  return true;
1113  }
1114 
1115  disableSROA(CostIt);
1116  }
1117 
1118  // If the data is already loaded from this address and hasn't been clobbered
1119  // by any stores or calls, this load is likely to be redundant and can be
1120  // eliminated.
1121  if (EnableLoadElimination &&
1122  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1123  LoadEliminationCost += InlineConstants::InstrCost;
1124  return true;
1125  }
1126 
1127  return false;
1128 }
1129 
1130 bool CallAnalyzer::visitStore(StoreInst &I) {
1131  Value *SROAArg;
1133  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1134  if (I.isSimple()) {
1135  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1136  return true;
1137  }
1138 
1139  disableSROA(CostIt);
1140  }
1141 
1142  // The store can potentially clobber loads and prevent repeated loads from
1143  // being eliminated.
1144  // FIXME:
1145  // 1. We can probably keep an initial set of eliminatable loads substracted
1146  // from the cost even when we finally see a store. We just need to disable
1147  // *further* accumulation of elimination savings.
1148  // 2. We should probably at some point thread MemorySSA for the callee into
1149  // this and then use that to actually compute *really* precise savings.
1150  disableLoadElimination();
1151  return false;
1152 }
1153 
1154 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1155  // Constant folding for extract value is trivial.
1156  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1157  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1158  }))
1159  return true;
1160 
1161  // SROA can look through these but give them a cost.
1162  return false;
1163 }
1164 
1165 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1166  // Constant folding for insert value is trivial.
1167  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1168  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1169  /*InsertedValueOperand*/ COps[1],
1170  I.getIndices());
1171  }))
1172  return true;
1173 
1174  // SROA can look through these but give them a cost.
1175  return false;
1176 }
1177 
1178 /// Try to simplify a call site.
1179 ///
1180 /// Takes a concrete function and callsite and tries to actually simplify it by
1181 /// analyzing the arguments and call itself with instsimplify. Returns true if
1182 /// it has simplified the callsite to some other entity (a constant), making it
1183 /// free.
1184 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
1185  // FIXME: Using the instsimplify logic directly for this is inefficient
1186  // because we have to continually rebuild the argument list even when no
1187  // simplifications can be performed. Until that is fixed with remapping
1188  // inside of instsimplify, directly constant fold calls here.
1189  if (!canConstantFoldCallTo(&Call, F))
1190  return false;
1191 
1192  // Try to re-map the arguments to constants.
1193  SmallVector<Constant *, 4> ConstantArgs;
1194  ConstantArgs.reserve(Call.arg_size());
1195  for (Value *I : Call.args()) {
1196  Constant *C = dyn_cast<Constant>(I);
1197  if (!C)
1198  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
1199  if (!C)
1200  return false; // This argument doesn't map to a constant.
1201 
1202  ConstantArgs.push_back(C);
1203  }
1204  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
1205  SimplifiedValues[&Call] = C;
1206  return true;
1207  }
1208 
1209  return false;
1210 }
1211 
1212 bool CallAnalyzer::visitCallBase(CallBase &Call) {
1213  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
1214  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1215  // This aborts the entire analysis.
1216  ExposesReturnsTwice = true;
1217  return false;
1218  }
1219  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
1220  ContainsNoDuplicateCall = true;
1221 
1222  if (Function *F = Call.getCalledFunction()) {
1223  // When we have a concrete function, first try to simplify it directly.
1224  if (simplifyCallSite(F, Call))
1225  return true;
1226 
1227  // Next check if it is an intrinsic we know about.
1228  // FIXME: Lift this into part of the InstVisitor.
1229  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
1230  switch (II->getIntrinsicID()) {
1231  default:
1232  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1233  disableLoadElimination();
1234  return Base::visitCallBase(Call);
1235 
1236  case Intrinsic::load_relative:
1237  // This is normally lowered to 4 LLVM instructions.
1238  addCost(3 * InlineConstants::InstrCost);
1239  return false;
1240 
1241  case Intrinsic::memset:
1242  case Intrinsic::memcpy:
1243  case Intrinsic::memmove:
1244  disableLoadElimination();
1245  // SROA can usually chew through these intrinsics, but they aren't free.
1246  return false;
1247  case Intrinsic::icall_branch_funnel:
1248  case Intrinsic::localescape:
1249  HasUninlineableIntrinsic = true;
1250  return false;
1251  case Intrinsic::vastart:
1252  InitsVargArgs = true;
1253  return false;
1254  }
1255  }
1256 
1257  if (F == Call.getFunction()) {
1258  // This flag will fully abort the analysis, so don't bother with anything
1259  // else.
1260  IsRecursiveCall = true;
1261  return false;
1262  }
1263 
1264  if (TTI.isLoweredToCall(F)) {
1265  // We account for the average 1 instruction per call argument setup
1266  // here.
1267  addCost(Call.arg_size() * InlineConstants::InstrCost);
1268 
1269  // Everything other than inline ASM will also have a significant cost
1270  // merely from making the call.
1271  if (!isa<InlineAsm>(Call.getCalledValue()))
1273  }
1274 
1275  if (!Call.onlyReadsMemory())
1276  disableLoadElimination();
1277  return Base::visitCallBase(Call);
1278  }
1279 
1280  // Otherwise we're in a very special case -- an indirect function call. See
1281  // if we can be particularly clever about this.
1282  Value *Callee = Call.getCalledValue();
1283 
1284  // First, pay the price of the argument setup. We account for the average
1285  // 1 instruction per call argument setup here.
1286  addCost(Call.arg_size() * InlineConstants::InstrCost);
1287 
1288  // Next, check if this happens to be an indirect function call to a known
1289  // function in this inline context. If not, we've done all we can.
1290  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1291  if (!F) {
1292  if (!Call.onlyReadsMemory())
1293  disableLoadElimination();
1294  return Base::visitCallBase(Call);
1295  }
1296 
1297  // If we have a constant that we are calling as a function, we can peer
1298  // through it and see the function target. This happens not infrequently
1299  // during devirtualization and so we want to give it a hefty bonus for
1300  // inlining, but cap that bonus in the event that inlining wouldn't pan
1301  // out. Pretend to inline the function, with a custom threshold.
1302  auto IndirectCallParams = Params;
1304  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
1305  IndirectCallParams);
1306  if (CA.analyzeCall(Call)) {
1307  // We were able to inline the indirect call! Subtract the cost from the
1308  // threshold to get the bonus we want to apply, but don't go below zero.
1309  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1310  }
1311 
1312  if (!F->onlyReadsMemory())
1313  disableLoadElimination();
1314  return Base::visitCallBase(Call);
1315 }
1316 
1317 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1318  // At least one return instruction will be free after inlining.
1319  bool Free = !HasReturn;
1320  HasReturn = true;
1321  return Free;
1322 }
1323 
1324 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1325  // We model unconditional branches as essentially free -- they really
1326  // shouldn't exist at all, but handling them makes the behavior of the
1327  // inliner more regular and predictable. Interestingly, conditional branches
1328  // which will fold away are also free.
1329  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1330  dyn_cast_or_null<ConstantInt>(
1331  SimplifiedValues.lookup(BI.getCondition()));
1332 }
1333 
1334 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1335  bool CheckSROA = SI.getType()->isPointerTy();
1336  Value *TrueVal = SI.getTrueValue();
1337  Value *FalseVal = SI.getFalseValue();
1338 
1339  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1340  if (!TrueC)
1341  TrueC = SimplifiedValues.lookup(TrueVal);
1342  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1343  if (!FalseC)
1344  FalseC = SimplifiedValues.lookup(FalseVal);
1345  Constant *CondC =
1346  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1347 
1348  if (!CondC) {
1349  // Select C, X, X => X
1350  if (TrueC == FalseC && TrueC) {
1351  SimplifiedValues[&SI] = TrueC;
1352  return true;
1353  }
1354 
1355  if (!CheckSROA)
1356  return Base::visitSelectInst(SI);
1357 
1358  std::pair<Value *, APInt> TrueBaseAndOffset =
1359  ConstantOffsetPtrs.lookup(TrueVal);
1360  std::pair<Value *, APInt> FalseBaseAndOffset =
1361  ConstantOffsetPtrs.lookup(FalseVal);
1362  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1363  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1364 
1365  Value *SROAArg;
1367  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1368  SROAArgValues[&SI] = SROAArg;
1369  return true;
1370  }
1371 
1372  return Base::visitSelectInst(SI);
1373  }
1374 
1375  // Select condition is a constant.
1376  Value *SelectedV = CondC->isAllOnesValue()
1377  ? TrueVal
1378  : (CondC->isNullValue()) ? FalseVal : nullptr;
1379  if (!SelectedV) {
1380  // Condition is a vector constant that is not all 1s or all 0s. If all
1381  // operands are constants, ConstantExpr::getSelect() can handle the cases
1382  // such as select vectors.
1383  if (TrueC && FalseC) {
1384  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1385  SimplifiedValues[&SI] = C;
1386  return true;
1387  }
1388  }
1389  return Base::visitSelectInst(SI);
1390  }
1391 
1392  // Condition is either all 1s or all 0s. SI can be simplified.
1393  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1394  SimplifiedValues[&SI] = SelectedC;
1395  return true;
1396  }
1397 
1398  if (!CheckSROA)
1399  return true;
1400 
1401  std::pair<Value *, APInt> BaseAndOffset =
1402  ConstantOffsetPtrs.lookup(SelectedV);
1403  if (BaseAndOffset.first) {
1404  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1405 
1406  Value *SROAArg;
1408  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1409  SROAArgValues[&SI] = SROAArg;
1410  }
1411 
1412  return true;
1413 }
1414 
1415 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1416  // We model unconditional switches as free, see the comments on handling
1417  // branches.
1418  if (isa<ConstantInt>(SI.getCondition()))
1419  return true;
1420  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1421  if (isa<ConstantInt>(V))
1422  return true;
1423 
1424  // Assume the most general case where the switch is lowered into
1425  // either a jump table, bit test, or a balanced binary tree consisting of
1426  // case clusters without merging adjacent clusters with the same
1427  // destination. We do not consider the switches that are lowered with a mix
1428  // of jump table/bit test/binary search tree. The cost of the switch is
1429  // proportional to the size of the tree or the size of jump table range.
1430  //
1431  // NB: We convert large switches which are just used to initialize large phi
1432  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1433  // inlining those. It will prevent inlining in cases where the optimization
1434  // does not (yet) fire.
1435 
1436  // Maximum valid cost increased in this function.
1437  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1438 
1439  // Exit early for a large switch, assuming one case needs at least one
1440  // instruction.
1441  // FIXME: This is not true for a bit test, but ignore such case for now to
1442  // save compile-time.
1443  int64_t CostLowerBound =
1444  std::min((int64_t)CostUpperBound,
1445  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1446 
1447  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1448  addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
1449  return false;
1450  }
1451 
1452  unsigned JumpTableSize = 0;
1453  unsigned NumCaseCluster =
1454  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1455 
1456  // If suitable for a jump table, consider the cost for the table size and
1457  // branch to destination.
1458  if (JumpTableSize) {
1459  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1460  4 * InlineConstants::InstrCost;
1461 
1462  addCost(JTCost, (int64_t)CostUpperBound);
1463  return false;
1464  }
1465 
1466  // Considering forming a binary search, we should find the number of nodes
1467  // which is same as the number of comparisons when lowered. For a given
1468  // number of clusters, n, we can define a recursive function, f(n), to find
1469  // the number of nodes in the tree. The recursion is :
1470  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1471  // and f(n) = n, when n <= 3.
1472  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1473  // when n > 3. So, the number of comparisons from leaves should be n, while
1474  // the number of non-leaf should be :
1475  // 2^(log2(n) - 1) - 1
1476  // = 2^log2(n) * 2^-1 - 1
1477  // = n / 2 - 1.
1478  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1479  // number of comparisons in a simple closed form :
1480  // n + n / 2 - 1 = n * 3 / 2 - 1
1481  if (NumCaseCluster <= 3) {
1482  // Suppose a comparison includes one compare and one conditional branch.
1483  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
1484  return false;
1485  }
1486 
1487  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1488  int64_t SwitchCost =
1489  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1490 
1491  addCost(SwitchCost, (int64_t)CostUpperBound);
1492  return false;
1493 }
1494 
1495 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1496  // We never want to inline functions that contain an indirectbr. This is
1497  // incorrect because all the blockaddress's (in static global initializers
1498  // for example) would be referring to the original function, and this
1499  // indirect jump would jump from the inlined copy of the function into the
1500  // original function which is extremely undefined behavior.
1501  // FIXME: This logic isn't really right; we can safely inline functions with
1502  // indirectbr's as long as no other function or global references the
1503  // blockaddress of a block within the current function.
1504  HasIndirectBr = true;
1505  return false;
1506 }
1507 
1508 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1509  // FIXME: It's not clear that a single instruction is an accurate model for
1510  // the inline cost of a resume instruction.
1511  return false;
1512 }
1513 
1514 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1515  // FIXME: It's not clear that a single instruction is an accurate model for
1516  // the inline cost of a cleanupret instruction.
1517  return false;
1518 }
1519 
1520 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1521  // FIXME: It's not clear that a single instruction is an accurate model for
1522  // the inline cost of a catchret instruction.
1523  return false;
1524 }
1525 
1526 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1527  // FIXME: It might be reasonably to discount the cost of instructions leading
1528  // to unreachable as they have the lowest possible impact on both runtime and
1529  // code size.
1530  return true; // No actual code is needed for unreachable.
1531 }
1532 
1533 bool CallAnalyzer::visitInstruction(Instruction &I) {
1534  // Some instructions are free. All of the free intrinsics can also be
1535  // handled by SROA, etc.
1536  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1537  return true;
1538 
1539  // We found something we don't understand or can't handle. Mark any SROA-able
1540  // values in the operand list as no longer viable.
1541  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1542  disableSROA(*OI);
1543 
1544  return false;
1545 }
1546 
1547 /// Analyze a basic block for its contribution to the inline cost.
1548 ///
1549 /// This method walks the analyzer over every instruction in the given basic
1550 /// block and accounts for their cost during inlining at this callsite. It
1551 /// aborts early if the threshold has been exceeded or an impossible to inline
1552 /// construct has been detected. It returns false if inlining is no longer
1553 /// viable, and true if inlining remains viable.
1555 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1556  SmallPtrSetImpl<const Value *> &EphValues) {
1557  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1558  // FIXME: Currently, the number of instructions in a function regardless of
1559  // our ability to simplify them during inline to constants or dead code,
1560  // are actually used by the vector bonus heuristic. As long as that's true,
1561  // we have to special case debug intrinsics here to prevent differences in
1562  // inlining due to debug symbols. Eventually, the number of unsimplified
1563  // instructions shouldn't factor into the cost computation, but until then,
1564  // hack around it here.
1565  if (isa<DbgInfoIntrinsic>(I))
1566  continue;
1567 
1568  // Skip ephemeral values.
1569  if (EphValues.count(&*I))
1570  continue;
1571 
1572  ++NumInstructions;
1573  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1574  ++NumVectorInstructions;
1575 
1576  // If the instruction simplified to a constant, there is no cost to this
1577  // instruction. Visit the instructions using our InstVisitor to account for
1578  // all of the per-instruction logic. The visit tree returns true if we
1579  // consumed the instruction in any way, and false if the instruction's base
1580  // cost should count against inlining.
1581  if (Base::visit(&*I))
1582  ++NumInstructionsSimplified;
1583  else
1584  addCost(InlineConstants::InstrCost);
1585 
1586  using namespace ore;
1587  // If the visit this instruction detected an uninlinable pattern, abort.
1588  InlineResult IR;
1589  if (IsRecursiveCall)
1590  IR = "recursive";
1591  else if (ExposesReturnsTwice)
1592  IR = "exposes returns twice";
1593  else if (HasDynamicAlloca)
1594  IR = "dynamic alloca";
1595  else if (HasIndirectBr)
1596  IR = "indirect branch";
1597  else if (HasUninlineableIntrinsic)
1598  IR = "uninlinable intrinsic";
1599  else if (InitsVargArgs)
1600  IR = "varargs";
1601  if (!IR) {
1602  if (ORE)
1603  ORE->emit([&]() {
1604  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1605  &CandidateCall)
1606  << NV("Callee", &F) << " has uninlinable pattern ("
1607  << NV("InlineResult", IR.message)
1608  << ") and cost is not fully computed";
1609  });
1610  return IR;
1611  }
1612 
1613  // If the caller is a recursive function then we don't want to inline
1614  // functions which allocate a lot of stack space because it would increase
1615  // the caller stack usage dramatically.
1616  if (IsCallerRecursive &&
1618  InlineResult IR = "recursive and allocates too much stack space";
1619  if (ORE)
1620  ORE->emit([&]() {
1621  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1622  &CandidateCall)
1623  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1624  << ". Cost is not fully computed";
1625  });
1626  return IR;
1627  }
1628 
1629  // Check if we've passed the maximum possible threshold so we don't spin in
1630  // huge basic blocks that will never inline.
1631  if (Cost >= Threshold && !ComputeFullInlineCost)
1632  return false;
1633  }
1634 
1635  return true;
1636 }
1637 
1638 /// Compute the base pointer and cumulative constant offsets for V.
1639 ///
1640 /// This strips all constant offsets off of V, leaving it the base pointer, and
1641 /// accumulates the total constant offset applied in the returned constant. It
1642 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1643 /// no constant offsets applied.
1644 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1645  if (!V->getType()->isPointerTy())
1646  return nullptr;
1647 
1648  unsigned AS = V->getType()->getPointerAddressSpace();
1649  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1650  APInt Offset = APInt::getNullValue(IntPtrWidth);
1651 
1652  // Even though we don't look through PHI nodes, we could be called on an
1653  // instruction in an unreachable block, which may be on a cycle.
1654  SmallPtrSet<Value *, 4> Visited;
1655  Visited.insert(V);
1656  do {
1657  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1658  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1659  return nullptr;
1660  V = GEP->getPointerOperand();
1661  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1662  V = cast<Operator>(V)->getOperand(0);
1663  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1664  if (GA->isInterposable())
1665  break;
1666  V = GA->getAliasee();
1667  } else {
1668  break;
1669  }
1670  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1671  } while (Visited.insert(V).second);
1672 
1673  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1674  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1675 }
1676 
1677 /// Find dead blocks due to deleted CFG edges during inlining.
1678 ///
1679 /// If we know the successor of the current block, \p CurrBB, has to be \p
1680 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1681 /// no live incoming CFG edges. If one block is found to be dead, we can
1682 /// continue growing the dead block list by checking the successors of the dead
1683 /// blocks to see if all their incoming edges are dead or not.
1684 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1685  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1686  // A CFG edge is dead if the predecessor is dead or the predecessor has a
1687  // known successor which is not the one under exam.
1688  return (DeadBlocks.count(Pred) ||
1689  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1690  };
1691 
1692  auto IsNewlyDead = [&](BasicBlock *BB) {
1693  // If all the edges to a block are dead, the block is also dead.
1694  return (!DeadBlocks.count(BB) &&
1696  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1697  };
1698 
1699  for (BasicBlock *Succ : successors(CurrBB)) {
1700  if (Succ == NextBB || !IsNewlyDead(Succ))
1701  continue;
1703  NewDead.push_back(Succ);
1704  while (!NewDead.empty()) {
1705  BasicBlock *Dead = NewDead.pop_back_val();
1706  if (DeadBlocks.insert(Dead))
1707  // Continue growing the dead block lists.
1708  for (BasicBlock *S : successors(Dead))
1709  if (IsNewlyDead(S))
1710  NewDead.push_back(S);
1711  }
1712  }
1713 }
1714 
1715 /// Analyze a call site for potential inlining.
1716 ///
1717 /// Returns true if inlining this call is viable, and false if it is not
1718 /// viable. It computes the cost and adjusts the threshold based on numerous
1719 /// factors and heuristics. If this method returns false but the computed cost
1720 /// is below the computed threshold, then inlining was forcibly disabled by
1721 /// some artifact of the routine.
1722 InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
1723  ++NumCallsAnalyzed;
1724 
1725  // Perform some tweaks to the cost and threshold based on the direct
1726  // callsite information.
1727 
1728  // We want to more aggressively inline vector-dense kernels, so up the
1729  // threshold, and we'll lower it if the % of vector instructions gets too
1730  // low. Note that these bonuses are some what arbitrary and evolved over time
1731  // by accident as much as because they are principled bonuses.
1732  //
1733  // FIXME: It would be nice to remove all such bonuses. At least it would be
1734  // nice to base the bonus values on something more scientific.
1735  assert(NumInstructions == 0);
1736  assert(NumVectorInstructions == 0);
1737 
1738  // Update the threshold based on callsite properties
1739  updateThreshold(Call, F);
1740 
1741  // While Threshold depends on commandline options that can take negative
1742  // values, we want to enforce the invariant that the computed threshold and
1743  // bonuses are non-negative.
1744  assert(Threshold >= 0);
1745  assert(SingleBBBonus >= 0);
1746  assert(VectorBonus >= 0);
1747 
1748  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1749  // this Threshold any time, and cost cannot decrease, we can stop processing
1750  // the rest of the function body.
1751  Threshold += (SingleBBBonus + VectorBonus);
1752 
1753  // Give out bonuses for the callsite, as the instructions setting them up
1754  // will be gone after inlining.
1755  addCost(-getCallsiteCost(Call, DL));
1756 
1757  // If this function uses the coldcc calling convention, prefer not to inline
1758  // it.
1759  if (F.getCallingConv() == CallingConv::Cold)
1761 
1762  // Check if we're done. This can happen due to bonuses and penalties.
1763  if (Cost >= Threshold && !ComputeFullInlineCost)
1764  return "high cost";
1765 
1766  if (F.empty())
1767  return true;
1768 
1769  Function *Caller = Call.getFunction();
1770  // Check if the caller function is recursive itself.
1771  for (User *U : Caller->users()) {
1772  CallBase *Call = dyn_cast<CallBase>(U);
1773  if (Call && Call->getFunction() == Caller) {
1774  IsCallerRecursive = true;
1775  break;
1776  }
1777  }
1778 
1779  // Populate our simplified values by mapping from function arguments to call
1780  // arguments with known important simplifications.
1781  auto CAI = Call.arg_begin();
1782  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1783  FAI != FAE; ++FAI, ++CAI) {
1784  assert(CAI != Call.arg_end());
1785  if (Constant *C = dyn_cast<Constant>(CAI))
1786  SimplifiedValues[&*FAI] = C;
1787 
1788  Value *PtrArg = *CAI;
1789  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1790  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1791 
1792  // We can SROA any pointer arguments derived from alloca instructions.
1793  if (isa<AllocaInst>(PtrArg)) {
1794  SROAArgValues[&*FAI] = PtrArg;
1795  SROAArgCosts[PtrArg] = 0;
1796  }
1797  }
1798  }
1799  NumConstantArgs = SimplifiedValues.size();
1800  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1801  NumAllocaArgs = SROAArgValues.size();
1802 
1803  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1804  // the ephemeral values multiple times (and they're completely determined by
1805  // the callee, so this is purely duplicate work).
1807  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1808 
1809  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1810  // adding basic blocks of the callee which can be proven to be dead for this
1811  // particular call site in order to get more accurate cost estimates. This
1812  // requires a somewhat heavyweight iteration pattern: we need to walk the
1813  // basic blocks in a breadth-first order as we insert live successors. To
1814  // accomplish this, prioritizing for small iterations because we exit after
1815  // crossing our threshold, we use a small-size optimized SetVector.
1818  BBSetVector;
1819  BBSetVector BBWorklist;
1820  BBWorklist.insert(&F.getEntryBlock());
1821  bool SingleBB = true;
1822  // Note that we *must not* cache the size, this loop grows the worklist.
1823  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1824  // Bail out the moment we cross the threshold. This means we'll under-count
1825  // the cost, but only when undercounting doesn't matter.
1826  if (Cost >= Threshold && !ComputeFullInlineCost)
1827  break;
1828 
1829  BasicBlock *BB = BBWorklist[Idx];
1830  if (BB->empty())
1831  continue;
1832 
1833  // Disallow inlining a blockaddress with uses other than strictly callbr.
1834  // A blockaddress only has defined behavior for an indirect branch in the
1835  // same function, and we do not currently support inlining indirect
1836  // branches. But, the inliner may not see an indirect branch that ends up
1837  // being dead code at a particular call site. If the blockaddress escapes
1838  // the function, e.g., via a global variable, inlining may lead to an
1839  // invalid cross-function reference.
1840  // FIXME: pr/39560: continue relaxing this overt restriction.
1841  if (BB->hasAddressTaken())
1842  for (User *U : BlockAddress::get(&*BB)->users())
1843  if (!isa<CallBrInst>(*U))
1844  return "blockaddress used outside of callbr";
1845 
1846  // Analyze the cost of this block. If we blow through the threshold, this
1847  // returns false, and we can bail on out.
1848  InlineResult IR = analyzeBlock(BB, EphValues);
1849  if (!IR)
1850  return IR;
1851 
1852  Instruction *TI = BB->getTerminator();
1853 
1854  // Add in the live successors by first checking whether we have terminator
1855  // that may be simplified based on the values simplified by this call.
1856  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1857  if (BI->isConditional()) {
1858  Value *Cond = BI->getCondition();
1859  if (ConstantInt *SimpleCond =
1860  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1861  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1862  BBWorklist.insert(NextBB);
1863  KnownSuccessors[BB] = NextBB;
1864  findDeadBlocks(BB, NextBB);
1865  continue;
1866  }
1867  }
1868  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1869  Value *Cond = SI->getCondition();
1870  if (ConstantInt *SimpleCond =
1871  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1872  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1873  BBWorklist.insert(NextBB);
1874  KnownSuccessors[BB] = NextBB;
1875  findDeadBlocks(BB, NextBB);
1876  continue;
1877  }
1878  }
1879 
1880  // If we're unable to select a particular successor, just count all of
1881  // them.
1882  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1883  ++TIdx)
1884  BBWorklist.insert(TI->getSuccessor(TIdx));
1885 
1886  // If we had any successors at this point, than post-inlining is likely to
1887  // have them as well. Note that we assume any basic blocks which existed
1888  // due to branches or switches which folded above will also fold after
1889  // inlining.
1890  if (SingleBB && TI->getNumSuccessors() > 1) {
1891  // Take off the bonus we applied to the threshold.
1892  Threshold -= SingleBBBonus;
1893  SingleBB = false;
1894  }
1895  }
1896 
1897  bool OnlyOneCallAndLocalLinkage =
1898  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1899  // If this is a noduplicate call, we can still inline as long as
1900  // inlining this would cause the removal of the caller (so the instruction
1901  // is not actually duplicated, just moved).
1902  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1903  return "noduplicate";
1904 
1905  // Loops generally act a lot like calls in that they act like barriers to
1906  // movement, require a certain amount of setup, etc. So when optimising for
1907  // size, we penalise any call sites that perform loops. We do this after all
1908  // other costs here, so will likely only be dealing with relatively small
1909  // functions (and hence DT and LI will hopefully be cheap).
1910  if (Caller->hasMinSize()) {
1911  DominatorTree DT(F);
1912  LoopInfo LI(DT);
1913  int NumLoops = 0;
1914  for (Loop *L : LI) {
1915  // Ignore loops that will not be executed
1916  if (DeadBlocks.count(L->getHeader()))
1917  continue;
1918  NumLoops++;
1919  }
1920  addCost(NumLoops * InlineConstants::CallPenalty);
1921  }
1922 
1923  // We applied the maximum possible vector bonus at the beginning. Now,
1924  // subtract the excess bonus, if any, from the Threshold before
1925  // comparing against Cost.
1926  if (NumVectorInstructions <= NumInstructions / 10)
1927  Threshold -= VectorBonus;
1928  else if (NumVectorInstructions <= NumInstructions / 2)
1929  Threshold -= VectorBonus/2;
1930 
1931  return Cost < std::max(1, Threshold);
1932 }
1933 
1934 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1935 /// Dump stats about this call's analysis.
1937 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1938  DEBUG_PRINT_STAT(NumConstantArgs);
1939  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1940  DEBUG_PRINT_STAT(NumAllocaArgs);
1941  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1942  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1943  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1944  DEBUG_PRINT_STAT(NumInstructions);
1945  DEBUG_PRINT_STAT(SROACostSavings);
1946  DEBUG_PRINT_STAT(SROACostSavingsLost);
1947  DEBUG_PRINT_STAT(LoadEliminationCost);
1948  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1949  DEBUG_PRINT_STAT(Cost);
1950  DEBUG_PRINT_STAT(Threshold);
1951 #undef DEBUG_PRINT_STAT
1952 }
1953 #endif
1954 
1955 /// Test that there are no attribute conflicts between Caller and Callee
1956 /// that prevent inlining.
1958  Function *Callee,
1959  TargetTransformInfo &TTI) {
1960  return TTI.areInlineCompatible(Caller, Callee) &&
1961  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1962 }
1963 
1965  int Cost = 0;
1966  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
1967  if (Call.isByValArgument(I)) {
1968  // We approximate the number of loads and stores needed by dividing the
1969  // size of the byval type by the target's pointer size.
1970  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
1971  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1972  unsigned AS = PTy->getAddressSpace();
1973  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1974  // Ceiling division.
1975  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1976 
1977  // If it generates more than 8 stores it is likely to be expanded as an
1978  // inline memcpy so we take that as an upper bound. Otherwise we assume
1979  // one load and one store per word copied.
1980  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1981  // here instead of a magic number of 8, but it's not available via
1982  // DataLayout.
1983  NumStores = std::min(NumStores, 8U);
1984 
1985  Cost += 2 * NumStores * InlineConstants::InstrCost;
1986  } else {
1987  // For non-byval arguments subtract off one instruction per call
1988  // argument.
1990  }
1991  }
1992  // The call instruction also disappears after inlining.
1993  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1994  return Cost;
1995 }
1996 
1998  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1999  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2002  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2003  GetAssumptionCache, GetBFI, PSI, ORE);
2004 }
2005 
2007  CallBase &Call, Function *Callee, const InlineParams &Params,
2008  TargetTransformInfo &CalleeTTI,
2009  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2012 
2013  // Cannot inline indirect calls.
2014  if (!Callee)
2015  return llvm::InlineCost::getNever("indirect call");
2016 
2017  // Never inline calls with byval arguments that does not have the alloca
2018  // address space. Since byval arguments can be replaced with a copy to an
2019  // alloca, the inlined code would need to be adjusted to handle that the
2020  // argument is in the alloca address space (so it is a little bit complicated
2021  // to solve).
2022  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2023  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2024  if (Call.isByValArgument(I)) {
2025  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2026  if (PTy->getAddressSpace() != AllocaAS)
2027  return llvm::InlineCost::getNever("byval arguments without alloca"
2028  " address space");
2029  }
2030 
2031  // Calls to functions with always-inline attributes should be inlined
2032  // whenever possible.
2033  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2034  auto IsViable = isInlineViable(*Callee);
2035  if (IsViable)
2036  return llvm::InlineCost::getAlways("always inline attribute");
2037  return llvm::InlineCost::getNever(IsViable.message);
2038  }
2039 
2040  // Never inline functions with conflicting attributes (unless callee has
2041  // always-inline attribute).
2042  Function *Caller = Call.getCaller();
2043  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2044  return llvm::InlineCost::getNever("conflicting attributes");
2045 
2046  // Don't inline this call if the caller has the optnone attribute.
2047  if (Caller->hasOptNone())
2048  return llvm::InlineCost::getNever("optnone attribute");
2049 
2050  // Don't inline a function that treats null pointer as valid into a caller
2051  // that does not have this attribute.
2052  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2053  return llvm::InlineCost::getNever("nullptr definitions incompatible");
2054 
2055  // Don't inline functions which can be interposed at link-time.
2056  if (Callee->isInterposable())
2057  return llvm::InlineCost::getNever("interposable");
2058 
2059  // Don't inline functions marked noinline.
2060  if (Callee->hasFnAttribute(Attribute::NoInline))
2061  return llvm::InlineCost::getNever("noinline function attribute");
2062 
2063  // Don't inline call sites marked noinline.
2064  if (Call.isNoInline())
2065  return llvm::InlineCost::getNever("noinline call site attribute");
2066 
2067  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2068  << "... (caller:" << Caller->getName() << ")\n");
2069 
2070  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
2071  Call, Params);
2072  InlineResult ShouldInline = CA.analyzeCall(Call);
2073 
2074  LLVM_DEBUG(CA.dump());
2075 
2076  // Check if there was a reason to force inlining or no inlining.
2077  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2078  return InlineCost::getNever(ShouldInline.message);
2079  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2080  return InlineCost::getAlways("empty function");
2081 
2082  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2083 }
2084 
2086  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2087  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2088  // Disallow inlining of functions which contain indirect branches.
2089  if (isa<IndirectBrInst>(BI->getTerminator()))
2090  return "contains indirect branches";
2091 
2092  // Disallow inlining of blockaddresses which are used by non-callbr
2093  // instructions.
2094  if (BI->hasAddressTaken())
2095  for (User *U : BlockAddress::get(&*BI)->users())
2096  if (!isa<CallBrInst>(*U))
2097  return "blockaddress used outside of callbr";
2098 
2099  for (auto &II : *BI) {
2100  CallBase *Call = dyn_cast<CallBase>(&II);
2101  if (!Call)
2102  continue;
2103 
2104  // Disallow recursive calls.
2105  if (&F == Call->getCalledFunction())
2106  return "recursive call";
2107 
2108  // Disallow calls which expose returns-twice to a function not previously
2109  // attributed as such.
2110  if (!ReturnsTwice && isa<CallInst>(Call) &&
2111  cast<CallInst>(Call)->canReturnTwice())
2112  return "exposes returns-twice attribute";
2113 
2114  if (Call->getCalledFunction())
2115  switch (Call->getCalledFunction()->getIntrinsicID()) {
2116  default:
2117  break;
2118  // Disallow inlining of @llvm.icall.branch.funnel because current
2119  // backend can't separate call targets from call arguments.
2120  case llvm::Intrinsic::icall_branch_funnel:
2121  return "disallowed inlining of @llvm.icall.branch.funnel";
2122  // Disallow inlining functions that call @llvm.localescape. Doing this
2123  // correctly would require major changes to the inliner.
2124  case llvm::Intrinsic::localescape:
2125  return "disallowed inlining of @llvm.localescape";
2126  // Disallow inlining of functions that initialize VarArgs with va_start.
2127  case llvm::Intrinsic::vastart:
2128  return "contains VarArgs initialized with va_start";
2129  }
2130  }
2131  }
2132 
2133  return true;
2134 }
2135 
2136 // APIs to create InlineParams based on command line flags and/or other
2137 // parameters.
2138 
2140  InlineParams Params;
2141 
2142  // This field is the threshold to use for a callee by default. This is
2143  // derived from one or more of:
2144  // * optimization or size-optimization levels,
2145  // * a value passed to createFunctionInliningPass function, or
2146  // * the -inline-threshold flag.
2147  // If the -inline-threshold flag is explicitly specified, that is used
2148  // irrespective of anything else.
2149  if (InlineThreshold.getNumOccurrences() > 0)
2151  else
2152  Params.DefaultThreshold = Threshold;
2153 
2154  // Set the HintThreshold knob from the -inlinehint-threshold.
2155  Params.HintThreshold = HintThreshold;
2156 
2157  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2158  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2159 
2160  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2161  // populate LocallyHotCallSiteThreshold. Later, we populate
2162  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2163  // we know that optimization level is O3 (in the getInlineParams variant that
2164  // takes the opt and size levels).
2165  // FIXME: Remove this check (and make the assignment unconditional) after
2166  // addressing size regression issues at O2.
2167  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2169 
2170  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2172 
2173  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2174  // -inlinehint-threshold commandline option is not explicitly given. If that
2175  // option is present, then its value applies even for callees with size and
2176  // minsize attributes.
2177  // If the -inline-threshold is not specified, set the ColdThreshold from the
2178  // -inlinecold-threshold even if it is not explicitly passed. If
2179  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2180  // explicitly specified to set the ColdThreshold knob
2181  if (InlineThreshold.getNumOccurrences() == 0) {
2184  Params.ColdThreshold = ColdThreshold;
2185  } else if (ColdThreshold.getNumOccurrences() > 0) {
2186  Params.ColdThreshold = ColdThreshold;
2187  }
2188  return Params;
2189 }
2190 
2193 }
2194 
2195 // Compute the default threshold for inlining based on the opt level and the
2196 // size opt level.
2197 static int computeThresholdFromOptLevels(unsigned OptLevel,
2198  unsigned SizeOptLevel) {
2199  if (OptLevel > 2)
2201  if (SizeOptLevel == 1) // -Os
2203  if (SizeOptLevel == 2) // -Oz
2205  return InlineThreshold;
2206 }
2207 
2208 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2209  auto Params =
2210  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2211  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2212  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2213  // when it is specified explicitly.
2214  if (OptLevel > 2)
2216  return Params;
2217 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
Return a value (possibly void), from a function.
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:699
bool isSimple() const
Definition: Instructions.h:276
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:99
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
Diagnostic information for missed-optimization remarks.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:379
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:597
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:164
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:603
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
iterator end()
Definition: Function.h:663
Function * getCaller()
Helper to get the caller (the parent function).
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:607
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:37
const int ColdccPenalty
Definition: InlineCost.h:47
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1769
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:419
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:298
BasicBlock * getSuccessor(unsigned i) const
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:388
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1077
F(f)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1189
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1946
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2239
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
op_iterator op_begin()
Definition: User.h:229
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:775
Represents the cost of inlining a function.
Definition: InlineCost.h:63
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1218
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:269
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2156
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:416
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:665
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:158
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
#define DEBUG_PRINT_STAT(x)
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents a cast from a pointer to an integer.
bool empty() const
Definition: BasicBlock.h:279
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:779
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:669
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1968
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:998
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
const int LastCallToStaticBonus
Definition: InlineCost.h:46
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:661
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:91
const int CallPenalty
Definition: InlineCost.h:45
InlineResult is basically true or false.
Definition: InlineCost.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
Class to represent pointers.
Definition: DerivedTypes.h:498
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1782
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:645
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2161
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool isNoInline() const
Return true if the call should not be inlined.
Definition: InstrTypes.h:1591
const int IndirectCallThreshold
Definition: InlineCost.h:44
unsigned arg_size() const
Definition: InstrTypes.h:1206
Value * getCalledValue() const
Definition: InstrTypes.h:1257
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1443
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Resume the propagation of an exception.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:40
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:278
op_iterator op_end()
Definition: User.h:231
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:167
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2043
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:681
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:174
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1350
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1199
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1505
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:838
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:391
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
#define DEBUG_TYPE
Definition: InlineCost.cpp:43
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:161
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
iterator_range< user_iterator > users()
Definition: Value.h:399
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1539
const Value * getFalseValue() const
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1183
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:784
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:94
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
const char * message
Definition: InlineCost.h:136
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1264
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1756
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1534
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:600
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:50
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:40
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:177
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2185
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:34
bool nullPointerIsDefined() const
Check if null pointer dereferencing is considered undefined behavior for the function.
Definition: Function.cpp:1439
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:568
bool isSimple() const
Definition: Instructions.h:401
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Statically lint checks LLVM IR
Definition: Lint.cpp:192
Type * getElementType() const
Definition: DerivedTypes.h:517
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:170
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:155
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:86
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:69
gep_type_iterator gep_type_begin(const User *GEP)