LLVM  6.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/GlobalAlias.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Operator.h"
35 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "inline-cost"
41 
42 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
43 
45  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
46  cl::desc("Control the amount of inlining to perform (default = 225)"));
47 
49  "inlinehint-threshold", cl::Hidden, cl::init(325),
50  cl::desc("Threshold for inlining functions with inline hint"));
51 
52 static cl::opt<int>
53  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
54  cl::init(45),
55  cl::desc("Threshold for inlining cold callsites"));
56 
57 // We introduce this threshold to help performance of instrumentation based
58 // PGO before we actually hook up inliner with analysis passes such as BPI and
59 // BFI.
61  "inlinecold-threshold", cl::Hidden, cl::init(45),
62  cl::desc("Threshold for inlining functions with cold attribute"));
63 
64 static cl::opt<int>
65  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
67  cl::desc("Threshold for hot callsites "));
68 
70  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
71  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
72  "entry frequency, for a callsite to be cold in the absence of "
73  "profile information."));
74 
75 namespace {
76 
77 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
79  friend class InstVisitor<CallAnalyzer, bool>;
80 
81  /// The TargetTransformInfo available for this compilation.
82  const TargetTransformInfo &TTI;
83 
84  /// Getter for the cache of @llvm.assume intrinsics.
85  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
86 
87  /// Getter for BlockFrequencyInfo
89 
90  /// Profile summary information.
91  ProfileSummaryInfo *PSI;
92 
93  /// The called function.
94  Function &F;
95 
96  // Cache the DataLayout since we use it a lot.
97  const DataLayout &DL;
98 
99  /// The candidate callsite being analyzed. Please do not use this to do
100  /// analysis in the caller function; we want the inline cost query to be
101  /// easily cacheable. Instead, use the cover function paramHasAttr.
102  CallSite CandidateCS;
103 
104  /// Tunable parameters that control the analysis.
105  const InlineParams &Params;
106 
107  int Threshold;
108  int Cost;
109 
110  bool IsCallerRecursive;
111  bool IsRecursiveCall;
112  bool ExposesReturnsTwice;
113  bool HasDynamicAlloca;
114  bool ContainsNoDuplicateCall;
115  bool HasReturn;
116  bool HasIndirectBr;
117  bool HasFrameEscape;
118 
119  /// Number of bytes allocated statically by the callee.
120  uint64_t AllocatedSize;
121  unsigned NumInstructions, NumVectorInstructions;
122  int FiftyPercentVectorBonus, TenPercentVectorBonus;
123  int VectorBonus;
124 
125  /// While we walk the potentially-inlined instructions, we build up and
126  /// maintain a mapping of simplified values specific to this callsite. The
127  /// idea is to propagate any special information we have about arguments to
128  /// this call through the inlinable section of the function, and account for
129  /// likely simplifications post-inlining. The most important aspect we track
130  /// is CFG altering simplifications -- when we prove a basic block dead, that
131  /// can cause dramatic shifts in the cost of inlining a function.
132  DenseMap<Value *, Constant *> SimplifiedValues;
133 
134  /// Keep track of the values which map back (through function arguments) to
135  /// allocas on the caller stack which could be simplified through SROA.
136  DenseMap<Value *, Value *> SROAArgValues;
137 
138  /// The mapping of caller Alloca values to their accumulated cost savings. If
139  /// we have to disable SROA for one of the allocas, this tells us how much
140  /// cost must be added.
141  DenseMap<Value *, int> SROAArgCosts;
142 
143  /// Keep track of values which map to a pointer base and constant offset.
145 
146  // Custom simplification helper routines.
147  bool isAllocaDerivedArg(Value *V);
148  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
150  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
151  void disableSROA(Value *V);
152  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
153  int InstructionCost);
154  bool isGEPFree(GetElementPtrInst &GEP);
155  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
156  bool simplifyCallSite(Function *F, CallSite CS);
157  template <typename Callable>
158  bool simplifyInstruction(Instruction &I, Callable Evaluate);
159  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
160 
161  /// Return true if the given argument to the function being considered for
162  /// inlining has the given attribute set either at the call site or the
163  /// function declaration. Primarily used to inspect call site specific
164  /// attributes since these can be more precise than the ones on the callee
165  /// itself.
166  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
167 
168  /// Return true if the given value is known non null within the callee if
169  /// inlined through this particular callsite.
170  bool isKnownNonNullInCallee(Value *V);
171 
172  /// Update Threshold based on callsite properties such as callee
173  /// attributes and callee hotness for PGO builds. The Callee is explicitly
174  /// passed to support analyzing indirect calls whose target is inferred by
175  /// analysis.
176  void updateThreshold(CallSite CS, Function &Callee);
177 
178  /// Return true if size growth is allowed when inlining the callee at CS.
179  bool allowSizeGrowth(CallSite CS);
180 
181  /// Return true if \p CS is a cold callsite.
182  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
183 
184  // Custom analysis routines.
185  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
186 
187  // Disable several entry points to the visitor so we don't accidentally use
188  // them by declaring but not defining them here.
189  void visit(Module *);
190  void visit(Module &);
191  void visit(Function *);
192  void visit(Function &);
193  void visit(BasicBlock *);
194  void visit(BasicBlock &);
195 
196  // Provide base case for our instruction visit.
197  bool visitInstruction(Instruction &I);
198 
199  // Our visit overrides.
200  bool visitAlloca(AllocaInst &I);
201  bool visitPHI(PHINode &I);
202  bool visitGetElementPtr(GetElementPtrInst &I);
203  bool visitBitCast(BitCastInst &I);
204  bool visitPtrToInt(PtrToIntInst &I);
205  bool visitIntToPtr(IntToPtrInst &I);
206  bool visitCastInst(CastInst &I);
207  bool visitUnaryInstruction(UnaryInstruction &I);
208  bool visitCmpInst(CmpInst &I);
209  bool visitSub(BinaryOperator &I);
210  bool visitBinaryOperator(BinaryOperator &I);
211  bool visitLoad(LoadInst &I);
212  bool visitStore(StoreInst &I);
213  bool visitExtractValue(ExtractValueInst &I);
214  bool visitInsertValue(InsertValueInst &I);
215  bool visitCallSite(CallSite CS);
216  bool visitReturnInst(ReturnInst &RI);
217  bool visitBranchInst(BranchInst &BI);
218  bool visitSwitchInst(SwitchInst &SI);
219  bool visitIndirectBrInst(IndirectBrInst &IBI);
220  bool visitResumeInst(ResumeInst &RI);
221  bool visitCleanupReturnInst(CleanupReturnInst &RI);
222  bool visitCatchReturnInst(CatchReturnInst &RI);
223  bool visitUnreachableInst(UnreachableInst &I);
224 
225 public:
226  CallAnalyzer(const TargetTransformInfo &TTI,
227  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
229  ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
230  const InlineParams &Params)
231  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
232  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
233  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
234  Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
235  ExposesReturnsTwice(false), HasDynamicAlloca(false),
236  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
237  HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
238  NumVectorInstructions(0), FiftyPercentVectorBonus(0),
239  TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
240  NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
241  NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
242  SROACostSavings(0), SROACostSavingsLost(0) {}
243 
244  bool analyzeCall(CallSite CS);
245 
246  int getThreshold() { return Threshold; }
247  int getCost() { return Cost; }
248 
249  // Keep a bunch of stats about the cost savings found so we can print them
250  // out when debugging.
251  unsigned NumConstantArgs;
252  unsigned NumConstantOffsetPtrArgs;
253  unsigned NumAllocaArgs;
254  unsigned NumConstantPtrCmps;
255  unsigned NumConstantPtrDiffs;
256  unsigned NumInstructionsSimplified;
257  unsigned SROACostSavings;
258  unsigned SROACostSavingsLost;
259 
260  void dump();
261 };
262 
263 } // namespace
264 
265 /// \brief Test whether the given value is an Alloca-derived function argument.
266 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
267  return SROAArgValues.count(V);
268 }
269 
270 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
271 /// Returns false if V does not map to a SROA-candidate.
272 bool CallAnalyzer::lookupSROAArgAndCost(
273  Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
274  if (SROAArgValues.empty() || SROAArgCosts.empty())
275  return false;
276 
277  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
278  if (ArgIt == SROAArgValues.end())
279  return false;
280 
281  Arg = ArgIt->second;
282  CostIt = SROAArgCosts.find(Arg);
283  return CostIt != SROAArgCosts.end();
284 }
285 
286 /// \brief Disable SROA for the candidate marked by this cost iterator.
287 ///
288 /// This marks the candidate as no longer viable for SROA, and adds the cost
289 /// savings associated with it back into the inline cost measurement.
290 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
291  // If we're no longer able to perform SROA we need to undo its cost savings
292  // and prevent subsequent analysis.
293  Cost += CostIt->second;
294  SROACostSavings -= CostIt->second;
295  SROACostSavingsLost += CostIt->second;
296  SROAArgCosts.erase(CostIt);
297 }
298 
299 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
300 void CallAnalyzer::disableSROA(Value *V) {
301  Value *SROAArg;
303  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
304  disableSROA(CostIt);
305 }
306 
307 /// \brief Accumulate the given cost for a particular SROA candidate.
308 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
309  int InstructionCost) {
310  CostIt->second += InstructionCost;
311  SROACostSavings += InstructionCost;
312 }
313 
314 /// \brief Accumulate a constant GEP offset into an APInt if possible.
315 ///
316 /// Returns false if unable to compute the offset for any reason. Respects any
317 /// simplified values known during the analysis of this callsite.
318 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
319  unsigned IntPtrWidth = DL.getPointerSizeInBits();
320  assert(IntPtrWidth == Offset.getBitWidth());
321 
322  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
323  GTI != GTE; ++GTI) {
324  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
325  if (!OpC)
326  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
327  OpC = dyn_cast<ConstantInt>(SimpleOp);
328  if (!OpC)
329  return false;
330  if (OpC->isZero())
331  continue;
332 
333  // Handle a struct index, which adds its field offset to the pointer.
334  if (StructType *STy = GTI.getStructTypeOrNull()) {
335  unsigned ElementIdx = OpC->getZExtValue();
336  const StructLayout *SL = DL.getStructLayout(STy);
337  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
338  continue;
339  }
340 
341  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
342  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
343  }
344  return true;
345 }
346 
347 /// \brief Use TTI to check whether a GEP is free.
348 ///
349 /// Respects any simplified values known during the analysis of this callsite.
350 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
351  SmallVector<Value *, 4> Indices;
352  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
353  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
354  Indices.push_back(SimpleOp);
355  else
356  Indices.push_back(*I);
358  TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
359  Indices);
360 }
361 
362 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
363  // Check whether inlining will turn a dynamic alloca into a static
364  // alloca and handle that case.
365  if (I.isArrayAllocation()) {
366  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
367  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
368  Type *Ty = I.getAllocatedType();
369  AllocatedSize = SaturatingMultiplyAdd(
370  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
371  return Base::visitAlloca(I);
372  }
373  }
374 
375  // Accumulate the allocated size.
376  if (I.isStaticAlloca()) {
377  Type *Ty = I.getAllocatedType();
378  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
379  }
380 
381  // We will happily inline static alloca instructions.
382  if (I.isStaticAlloca())
383  return Base::visitAlloca(I);
384 
385  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
386  // a variety of reasons, and so we would like to not inline them into
387  // functions which don't currently have a dynamic alloca. This simply
388  // disables inlining altogether in the presence of a dynamic alloca.
389  HasDynamicAlloca = true;
390  return false;
391 }
392 
393 bool CallAnalyzer::visitPHI(PHINode &I) {
394  // FIXME: We should potentially be tracking values through phi nodes,
395  // especially when they collapse to a single value due to deleted CFG edges
396  // during inlining.
397 
398  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
399  // though we don't want to propagate it's bonuses. The idea is to disable
400  // SROA if it *might* be used in an inappropriate manner.
401 
402  // Phi nodes are always zero-cost.
403  return true;
404 }
405 
406 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
407  Value *SROAArg;
409  bool SROACandidate =
410  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
411 
412  // Try to fold GEPs of constant-offset call site argument pointers. This
413  // requires target data and inbounds GEPs.
414  if (I.isInBounds()) {
415  // Check if we have a base + offset for the pointer.
416  Value *Ptr = I.getPointerOperand();
417  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
418  if (BaseAndOffset.first) {
419  // Check if the offset of this GEP is constant, and if so accumulate it
420  // into Offset.
421  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
422  // Non-constant GEPs aren't folded, and disable SROA.
423  if (SROACandidate)
424  disableSROA(CostIt);
425  return isGEPFree(I);
426  }
427 
428  // Add the result as a new mapping to Base + Offset.
429  ConstantOffsetPtrs[&I] = BaseAndOffset;
430 
431  // Also handle SROA candidates here, we already know that the GEP is
432  // all-constant indexed.
433  if (SROACandidate)
434  SROAArgValues[&I] = SROAArg;
435 
436  return true;
437  }
438  }
439 
440  // Lambda to check whether a GEP's indices are all constant.
441  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
442  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
443  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
444  return false;
445  return true;
446  };
447 
448  if (IsGEPOffsetConstant(I)) {
449  if (SROACandidate)
450  SROAArgValues[&I] = SROAArg;
451 
452  // Constant GEPs are modeled as free.
453  return true;
454  }
455 
456  // Variable GEPs will require math and will disable SROA.
457  if (SROACandidate)
458  disableSROA(CostIt);
459  return isGEPFree(I);
460 }
461 
462 /// Simplify \p I if its operands are constants and update SimplifiedValues.
463 /// \p Evaluate is a callable specific to instruction type that evaluates the
464 /// instruction when all the operands are constants.
465 template <typename Callable>
466 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
468  for (Value *Op : I.operands()) {
469  Constant *COp = dyn_cast<Constant>(Op);
470  if (!COp)
471  COp = SimplifiedValues.lookup(Op);
472  if (!COp)
473  return false;
474  COps.push_back(COp);
475  }
476  auto *C = Evaluate(COps);
477  if (!C)
478  return false;
479  SimplifiedValues[&I] = C;
480  return true;
481 }
482 
483 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
484  // Propagate constants through bitcasts.
485  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
486  return ConstantExpr::getBitCast(COps[0], I.getType());
487  }))
488  return true;
489 
490  // Track base/offsets through casts
491  std::pair<Value *, APInt> BaseAndOffset =
492  ConstantOffsetPtrs.lookup(I.getOperand(0));
493  // Casts don't change the offset, just wrap it up.
494  if (BaseAndOffset.first)
495  ConstantOffsetPtrs[&I] = BaseAndOffset;
496 
497  // Also look for SROA candidates here.
498  Value *SROAArg;
500  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
501  SROAArgValues[&I] = SROAArg;
502 
503  // Bitcasts are always zero cost.
504  return true;
505 }
506 
507 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
508  // Propagate constants through ptrtoint.
509  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
510  return ConstantExpr::getPtrToInt(COps[0], I.getType());
511  }))
512  return true;
513 
514  // Track base/offset pairs when converted to a plain integer provided the
515  // integer is large enough to represent the pointer.
516  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
517  if (IntegerSize >= DL.getPointerSizeInBits()) {
518  std::pair<Value *, APInt> BaseAndOffset =
519  ConstantOffsetPtrs.lookup(I.getOperand(0));
520  if (BaseAndOffset.first)
521  ConstantOffsetPtrs[&I] = BaseAndOffset;
522  }
523 
524  // This is really weird. Technically, ptrtoint will disable SROA. However,
525  // unless that ptrtoint is *used* somewhere in the live basic blocks after
526  // inlining, it will be nuked, and SROA should proceed. All of the uses which
527  // would block SROA would also block SROA if applied directly to a pointer,
528  // and so we can just add the integer in here. The only places where SROA is
529  // preserved either cannot fire on an integer, or won't in-and-of themselves
530  // disable SROA (ext) w/o some later use that we would see and disable.
531  Value *SROAArg;
533  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
534  SROAArgValues[&I] = SROAArg;
535 
536  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
537 }
538 
539 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
540  // Propagate constants through ptrtoint.
541  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
542  return ConstantExpr::getIntToPtr(COps[0], I.getType());
543  }))
544  return true;
545 
546  // Track base/offset pairs when round-tripped through a pointer without
547  // modifications provided the integer is not too large.
548  Value *Op = I.getOperand(0);
549  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
550  if (IntegerSize <= DL.getPointerSizeInBits()) {
551  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
552  if (BaseAndOffset.first)
553  ConstantOffsetPtrs[&I] = BaseAndOffset;
554  }
555 
556  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
557  Value *SROAArg;
559  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
560  SROAArgValues[&I] = SROAArg;
561 
562  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
563 }
564 
565 bool CallAnalyzer::visitCastInst(CastInst &I) {
566  // Propagate constants through ptrtoint.
567  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
568  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
569  }))
570  return true;
571 
572  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
573  disableSROA(I.getOperand(0));
574 
575  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
576 }
577 
578 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
579  Value *Operand = I.getOperand(0);
580  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
581  return ConstantFoldInstOperands(&I, COps[0], DL);
582  }))
583  return true;
584 
585  // Disable any SROA on the argument to arbitrary unary operators.
586  disableSROA(Operand);
587 
588  return false;
589 }
590 
591 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
592  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
593 }
594 
595 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
596  // Does the *call site* have the NonNull attribute set on an argument? We
597  // use the attribute on the call site to memoize any analysis done in the
598  // caller. This will also trip if the callee function has a non-null
599  // parameter attribute, but that's a less interesting case because hopefully
600  // the callee would already have been simplified based on that.
601  if (Argument *A = dyn_cast<Argument>(V))
602  if (paramHasAttr(A, Attribute::NonNull))
603  return true;
604 
605  // Is this an alloca in the caller? This is distinct from the attribute case
606  // above because attributes aren't updated within the inliner itself and we
607  // always want to catch the alloca derived case.
608  if (isAllocaDerivedArg(V))
609  // We can actually predict the result of comparisons between an
610  // alloca-derived value and null. Note that this fires regardless of
611  // SROA firing.
612  return true;
613 
614  return false;
615 }
616 
617 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
618  // If the normal destination of the invoke or the parent block of the call
619  // site is unreachable-terminated, there is little point in inlining this
620  // unless there is literally zero cost.
621  // FIXME: Note that it is possible that an unreachable-terminated block has a
622  // hot entry. For example, in below scenario inlining hot_call_X() may be
623  // beneficial :
624  // main() {
625  // hot_call_1();
626  // ...
627  // hot_call_N()
628  // exit(0);
629  // }
630  // For now, we are not handling this corner case here as it is rare in real
631  // code. In future, we should elaborate this based on BPI and BFI in more
632  // general threshold adjusting heuristics in updateThreshold().
633  Instruction *Instr = CS.getInstruction();
634  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
635  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
636  return false;
637  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
638  return false;
639 
640  return true;
641 }
642 
643 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
644  // If global profile summary is available, then callsite's coldness is
645  // determined based on that.
646  if (PSI->hasProfileSummary())
647  return PSI->isColdCallSite(CS, CallerBFI);
648  if (!CallerBFI)
649  return false;
650 
651  // In the absence of global profile summary, determine if the callsite is cold
652  // relative to caller's entry. We could potentially cache the computation of
653  // scaled entry frequency, but the added complexity is not worth it unless
654  // this scaling shows up high in the profiles.
655  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
656  auto CallSiteBB = CS.getInstruction()->getParent();
657  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
658  auto CallerEntryFreq =
659  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
660  return CallSiteFreq < CallerEntryFreq * ColdProb;
661 }
662 
663 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
664  // If no size growth is allowed for this inlining, set Threshold to 0.
665  if (!allowSizeGrowth(CS)) {
666  Threshold = 0;
667  return;
668  }
669 
670  Function *Caller = CS.getCaller();
671 
672  // return min(A, B) if B is valid.
673  auto MinIfValid = [](int A, Optional<int> B) {
674  return B ? std::min(A, B.getValue()) : A;
675  };
676 
677  // return max(A, B) if B is valid.
678  auto MaxIfValid = [](int A, Optional<int> B) {
679  return B ? std::max(A, B.getValue()) : A;
680  };
681 
682  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
683  // and reduce the threshold if the caller has the necessary attribute.
684  if (Caller->optForMinSize())
685  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
686  else if (Caller->optForSize())
687  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
688 
689  // Adjust the threshold based on inlinehint attribute and profile based
690  // hotness information if the caller does not have MinSize attribute.
691  if (!Caller->optForMinSize()) {
692  if (Callee.hasFnAttribute(Attribute::InlineHint))
693  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
694  if (PSI) {
695  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
696  // FIXME: After switching to the new passmanager, simplify the logic below
697  // by checking only the callsite hotness/coldness. The check for CallerBFI
698  // exists only because we do not have BFI available with the old PM.
699  //
700  // Use callee's hotness information only if we have no way of determining
701  // callsite's hotness information. Callsite hotness can be determined if
702  // sample profile is used (which adds hotness metadata to calls) or if
703  // caller's BlockFrequencyInfo is available.
704  if (CallerBFI || PSI->hasSampleProfile()) {
705  if (PSI->isHotCallSite(CS, CallerBFI)) {
706  DEBUG(dbgs() << "Hot callsite.\n");
707  Threshold = Params.HotCallSiteThreshold.getValue();
708  } else if (isColdCallSite(CS, CallerBFI)) {
709  DEBUG(dbgs() << "Cold callsite.\n");
710  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
711  }
712  } else {
713  if (PSI->isFunctionEntryHot(&Callee)) {
714  DEBUG(dbgs() << "Hot callee.\n");
715  // If callsite hotness can not be determined, we may still know
716  // that the callee is hot and treat it as a weaker hint for threshold
717  // increase.
718  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
719  } else if (PSI->isFunctionEntryCold(&Callee)) {
720  DEBUG(dbgs() << "Cold callee.\n");
721  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
722  }
723  }
724  }
725  }
726 
727  // Finally, take the target-specific inlining threshold multiplier into
728  // account.
729  Threshold *= TTI.getInliningThresholdMultiplier();
730 }
731 
732 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
733  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
734  // First try to handle simplified comparisons.
735  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
736  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
737  }))
738  return true;
739 
740  if (I.getOpcode() == Instruction::FCmp)
741  return false;
742 
743  // Otherwise look for a comparison between constant offset pointers with
744  // a common base.
745  Value *LHSBase, *RHSBase;
746  APInt LHSOffset, RHSOffset;
747  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
748  if (LHSBase) {
749  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
750  if (RHSBase && LHSBase == RHSBase) {
751  // We have common bases, fold the icmp to a constant based on the
752  // offsets.
753  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
754  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
755  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
756  SimplifiedValues[&I] = C;
757  ++NumConstantPtrCmps;
758  return true;
759  }
760  }
761  }
762 
763  // If the comparison is an equality comparison with null, we can simplify it
764  // if we know the value (argument) can't be null
765  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
766  isKnownNonNullInCallee(I.getOperand(0))) {
767  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
768  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
770  return true;
771  }
772  // Finally check for SROA candidates in comparisons.
773  Value *SROAArg;
775  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
776  if (isa<ConstantPointerNull>(I.getOperand(1))) {
777  accumulateSROACost(CostIt, InlineConstants::InstrCost);
778  return true;
779  }
780 
781  disableSROA(CostIt);
782  }
783 
784  return false;
785 }
786 
787 bool CallAnalyzer::visitSub(BinaryOperator &I) {
788  // Try to handle a special case: we can fold computing the difference of two
789  // constant-related pointers.
790  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
791  Value *LHSBase, *RHSBase;
792  APInt LHSOffset, RHSOffset;
793  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
794  if (LHSBase) {
795  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
796  if (RHSBase && LHSBase == RHSBase) {
797  // We have common bases, fold the subtract to a constant based on the
798  // offsets.
799  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
800  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
801  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
802  SimplifiedValues[&I] = C;
803  ++NumConstantPtrDiffs;
804  return true;
805  }
806  }
807  }
808 
809  // Otherwise, fall back to the generic logic for simplifying and handling
810  // instructions.
811  return Base::visitSub(I);
812 }
813 
814 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
815  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
816  auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
817  Value *SimpleV = nullptr;
818  if (auto FI = dyn_cast<FPMathOperator>(&I))
819  SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
820  FI->getFastMathFlags(), DL);
821  else
822  SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
823  return dyn_cast_or_null<Constant>(SimpleV);
824  };
825 
826  if (simplifyInstruction(I, Evaluate))
827  return true;
828 
829  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
830  disableSROA(LHS);
831  disableSROA(RHS);
832 
833  return false;
834 }
835 
836 bool CallAnalyzer::visitLoad(LoadInst &I) {
837  Value *SROAArg;
839  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
840  if (I.isSimple()) {
841  accumulateSROACost(CostIt, InlineConstants::InstrCost);
842  return true;
843  }
844 
845  disableSROA(CostIt);
846  }
847 
848  return false;
849 }
850 
851 bool CallAnalyzer::visitStore(StoreInst &I) {
852  Value *SROAArg;
854  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
855  if (I.isSimple()) {
856  accumulateSROACost(CostIt, InlineConstants::InstrCost);
857  return true;
858  }
859 
860  disableSROA(CostIt);
861  }
862 
863  return false;
864 }
865 
866 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
867  // Constant folding for extract value is trivial.
868  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
869  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
870  }))
871  return true;
872 
873  // SROA can look through these but give them a cost.
874  return false;
875 }
876 
877 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
878  // Constant folding for insert value is trivial.
879  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
880  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
881  /*InsertedValueOperand*/ COps[1],
882  I.getIndices());
883  }))
884  return true;
885 
886  // SROA can look through these but give them a cost.
887  return false;
888 }
889 
890 /// \brief Try to simplify a call site.
891 ///
892 /// Takes a concrete function and callsite and tries to actually simplify it by
893 /// analyzing the arguments and call itself with instsimplify. Returns true if
894 /// it has simplified the callsite to some other entity (a constant), making it
895 /// free.
896 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
897  // FIXME: Using the instsimplify logic directly for this is inefficient
898  // because we have to continually rebuild the argument list even when no
899  // simplifications can be performed. Until that is fixed with remapping
900  // inside of instsimplify, directly constant fold calls here.
901  if (!canConstantFoldCallTo(CS, F))
902  return false;
903 
904  // Try to re-map the arguments to constants.
905  SmallVector<Constant *, 4> ConstantArgs;
906  ConstantArgs.reserve(CS.arg_size());
907  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
908  ++I) {
909  Constant *C = dyn_cast<Constant>(*I);
910  if (!C)
911  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
912  if (!C)
913  return false; // This argument doesn't map to a constant.
914 
915  ConstantArgs.push_back(C);
916  }
917  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
918  SimplifiedValues[CS.getInstruction()] = C;
919  return true;
920  }
921 
922  return false;
923 }
924 
925 bool CallAnalyzer::visitCallSite(CallSite CS) {
926  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
927  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
928  // This aborts the entire analysis.
929  ExposesReturnsTwice = true;
930  return false;
931  }
932  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
933  ContainsNoDuplicateCall = true;
934 
935  if (Function *F = CS.getCalledFunction()) {
936  // When we have a concrete function, first try to simplify it directly.
937  if (simplifyCallSite(F, CS))
938  return true;
939 
940  // Next check if it is an intrinsic we know about.
941  // FIXME: Lift this into part of the InstVisitor.
942  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
943  switch (II->getIntrinsicID()) {
944  default:
945  return Base::visitCallSite(CS);
946 
947  case Intrinsic::load_relative:
948  // This is normally lowered to 4 LLVM instructions.
949  Cost += 3 * InlineConstants::InstrCost;
950  return false;
951 
952  case Intrinsic::memset:
953  case Intrinsic::memcpy:
954  case Intrinsic::memmove:
955  // SROA can usually chew through these intrinsics, but they aren't free.
956  return false;
957  case Intrinsic::localescape:
958  HasFrameEscape = true;
959  return false;
960  }
961  }
962 
963  if (F == CS.getInstruction()->getParent()->getParent()) {
964  // This flag will fully abort the analysis, so don't bother with anything
965  // else.
966  IsRecursiveCall = true;
967  return false;
968  }
969 
970  if (TTI.isLoweredToCall(F)) {
971  // We account for the average 1 instruction per call argument setup
972  // here.
973  Cost += CS.arg_size() * InlineConstants::InstrCost;
974 
975  // Everything other than inline ASM will also have a significant cost
976  // merely from making the call.
977  if (!isa<InlineAsm>(CS.getCalledValue()))
979  }
980 
981  return Base::visitCallSite(CS);
982  }
983 
984  // Otherwise we're in a very special case -- an indirect function call. See
985  // if we can be particularly clever about this.
986  Value *Callee = CS.getCalledValue();
987 
988  // First, pay the price of the argument setup. We account for the average
989  // 1 instruction per call argument setup here.
990  Cost += CS.arg_size() * InlineConstants::InstrCost;
991 
992  // Next, check if this happens to be an indirect function call to a known
993  // function in this inline context. If not, we've done all we can.
994  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
995  if (!F)
996  return Base::visitCallSite(CS);
997 
998  // If we have a constant that we are calling as a function, we can peer
999  // through it and see the function target. This happens not infrequently
1000  // during devirtualization and so we want to give it a hefty bonus for
1001  // inlining, but cap that bonus in the event that inlining wouldn't pan
1002  // out. Pretend to inline the function, with a custom threshold.
1003  auto IndirectCallParams = Params;
1005  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
1006  IndirectCallParams);
1007  if (CA.analyzeCall(CS)) {
1008  // We were able to inline the indirect call! Subtract the cost from the
1009  // threshold to get the bonus we want to apply, but don't go below zero.
1010  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1011  }
1012 
1013  return Base::visitCallSite(CS);
1014 }
1015 
1016 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1017  // At least one return instruction will be free after inlining.
1018  bool Free = !HasReturn;
1019  HasReturn = true;
1020  return Free;
1021 }
1022 
1023 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1024  // We model unconditional branches as essentially free -- they really
1025  // shouldn't exist at all, but handling them makes the behavior of the
1026  // inliner more regular and predictable. Interestingly, conditional branches
1027  // which will fold away are also free.
1028  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1029  dyn_cast_or_null<ConstantInt>(
1030  SimplifiedValues.lookup(BI.getCondition()));
1031 }
1032 
1033 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1034  // We model unconditional switches as free, see the comments on handling
1035  // branches.
1036  if (isa<ConstantInt>(SI.getCondition()))
1037  return true;
1038  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1039  if (isa<ConstantInt>(V))
1040  return true;
1041 
1042  // Assume the most general case where the switch is lowered into
1043  // either a jump table, bit test, or a balanced binary tree consisting of
1044  // case clusters without merging adjacent clusters with the same
1045  // destination. We do not consider the switches that are lowered with a mix
1046  // of jump table/bit test/binary search tree. The cost of the switch is
1047  // proportional to the size of the tree or the size of jump table range.
1048  //
1049  // NB: We convert large switches which are just used to initialize large phi
1050  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1051  // inlining those. It will prevent inlining in cases where the optimization
1052  // does not (yet) fire.
1053 
1054  // Maximum valid cost increased in this function.
1055  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1056 
1057  // Exit early for a large switch, assuming one case needs at least one
1058  // instruction.
1059  // FIXME: This is not true for a bit test, but ignore such case for now to
1060  // save compile-time.
1061  int64_t CostLowerBound =
1062  std::min((int64_t)CostUpperBound,
1063  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1064 
1065  if (CostLowerBound > Threshold) {
1066  Cost = CostLowerBound;
1067  return false;
1068  }
1069 
1070  unsigned JumpTableSize = 0;
1071  unsigned NumCaseCluster =
1072  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1073 
1074  // If suitable for a jump table, consider the cost for the table size and
1075  // branch to destination.
1076  if (JumpTableSize) {
1077  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1078  4 * InlineConstants::InstrCost;
1079 
1080  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1081  return false;
1082  }
1083 
1084  // Considering forming a binary search, we should find the number of nodes
1085  // which is same as the number of comparisons when lowered. For a given
1086  // number of clusters, n, we can define a recursive function, f(n), to find
1087  // the number of nodes in the tree. The recursion is :
1088  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1089  // and f(n) = n, when n <= 3.
1090  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1091  // when n > 3. So, the number of comparisons from leaves should be n, while
1092  // the number of non-leaf should be :
1093  // 2^(log2(n) - 1) - 1
1094  // = 2^log2(n) * 2^-1 - 1
1095  // = n / 2 - 1.
1096  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1097  // number of comparisons in a simple closed form :
1098  // n + n / 2 - 1 = n * 3 / 2 - 1
1099  if (NumCaseCluster <= 3) {
1100  // Suppose a comparison includes one compare and one conditional branch.
1101  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1102  return false;
1103  }
1104 
1105  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1106  int64_t SwitchCost =
1107  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1108 
1109  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1110  return false;
1111 }
1112 
1113 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1114  // We never want to inline functions that contain an indirectbr. This is
1115  // incorrect because all the blockaddress's (in static global initializers
1116  // for example) would be referring to the original function, and this
1117  // indirect jump would jump from the inlined copy of the function into the
1118  // original function which is extremely undefined behavior.
1119  // FIXME: This logic isn't really right; we can safely inline functions with
1120  // indirectbr's as long as no other function or global references the
1121  // blockaddress of a block within the current function.
1122  HasIndirectBr = true;
1123  return false;
1124 }
1125 
1126 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1127  // FIXME: It's not clear that a single instruction is an accurate model for
1128  // the inline cost of a resume instruction.
1129  return false;
1130 }
1131 
1132 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1133  // FIXME: It's not clear that a single instruction is an accurate model for
1134  // the inline cost of a cleanupret instruction.
1135  return false;
1136 }
1137 
1138 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1139  // FIXME: It's not clear that a single instruction is an accurate model for
1140  // the inline cost of a catchret instruction.
1141  return false;
1142 }
1143 
1144 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1145  // FIXME: It might be reasonably to discount the cost of instructions leading
1146  // to unreachable as they have the lowest possible impact on both runtime and
1147  // code size.
1148  return true; // No actual code is needed for unreachable.
1149 }
1150 
1151 bool CallAnalyzer::visitInstruction(Instruction &I) {
1152  // Some instructions are free. All of the free intrinsics can also be
1153  // handled by SROA, etc.
1154  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1155  return true;
1156 
1157  // We found something we don't understand or can't handle. Mark any SROA-able
1158  // values in the operand list as no longer viable.
1159  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1160  disableSROA(*OI);
1161 
1162  return false;
1163 }
1164 
1165 /// \brief Analyze a basic block for its contribution to the inline cost.
1166 ///
1167 /// This method walks the analyzer over every instruction in the given basic
1168 /// block and accounts for their cost during inlining at this callsite. It
1169 /// aborts early if the threshold has been exceeded or an impossible to inline
1170 /// construct has been detected. It returns false if inlining is no longer
1171 /// viable, and true if inlining remains viable.
1172 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1173  SmallPtrSetImpl<const Value *> &EphValues) {
1174  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1175  // FIXME: Currently, the number of instructions in a function regardless of
1176  // our ability to simplify them during inline to constants or dead code,
1177  // are actually used by the vector bonus heuristic. As long as that's true,
1178  // we have to special case debug intrinsics here to prevent differences in
1179  // inlining due to debug symbols. Eventually, the number of unsimplified
1180  // instructions shouldn't factor into the cost computation, but until then,
1181  // hack around it here.
1182  if (isa<DbgInfoIntrinsic>(I))
1183  continue;
1184 
1185  // Skip ephemeral values.
1186  if (EphValues.count(&*I))
1187  continue;
1188 
1189  ++NumInstructions;
1190  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1191  ++NumVectorInstructions;
1192 
1193  // If the instruction is floating point, and the target says this operation
1194  // is expensive or the function has the "use-soft-float" attribute, this may
1195  // eventually become a library call. Treat the cost as such.
1196  if (I->getType()->isFloatingPointTy()) {
1197  // If the function has the "use-soft-float" attribute, mark it as
1198  // expensive.
1199  if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1200  (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
1202  }
1203 
1204  // If the instruction simplified to a constant, there is no cost to this
1205  // instruction. Visit the instructions using our InstVisitor to account for
1206  // all of the per-instruction logic. The visit tree returns true if we
1207  // consumed the instruction in any way, and false if the instruction's base
1208  // cost should count against inlining.
1209  if (Base::visit(&*I))
1210  ++NumInstructionsSimplified;
1211  else
1213 
1214  // If the visit this instruction detected an uninlinable pattern, abort.
1215  if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1216  HasIndirectBr || HasFrameEscape)
1217  return false;
1218 
1219  // If the caller is a recursive function then we don't want to inline
1220  // functions which allocate a lot of stack space because it would increase
1221  // the caller stack usage dramatically.
1222  if (IsCallerRecursive &&
1224  return false;
1225 
1226  // Check if we've past the maximum possible threshold so we don't spin in
1227  // huge basic blocks that will never inline.
1228  if (Cost > Threshold)
1229  return false;
1230  }
1231 
1232  return true;
1233 }
1234 
1235 /// \brief Compute the base pointer and cumulative constant offsets for V.
1236 ///
1237 /// This strips all constant offsets off of V, leaving it the base pointer, and
1238 /// accumulates the total constant offset applied in the returned constant. It
1239 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1240 /// no constant offsets applied.
1241 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1242  if (!V->getType()->isPointerTy())
1243  return nullptr;
1244 
1245  unsigned IntPtrWidth = DL.getPointerSizeInBits();
1246  APInt Offset = APInt::getNullValue(IntPtrWidth);
1247 
1248  // Even though we don't look through PHI nodes, we could be called on an
1249  // instruction in an unreachable block, which may be on a cycle.
1250  SmallPtrSet<Value *, 4> Visited;
1251  Visited.insert(V);
1252  do {
1253  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1254  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1255  return nullptr;
1256  V = GEP->getPointerOperand();
1257  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1258  V = cast<Operator>(V)->getOperand(0);
1259  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1260  if (GA->isInterposable())
1261  break;
1262  V = GA->getAliasee();
1263  } else {
1264  break;
1265  }
1266  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1267  } while (Visited.insert(V).second);
1268 
1269  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1270  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1271 }
1272 
1273 /// \brief Analyze a call site for potential inlining.
1274 ///
1275 /// Returns true if inlining this call is viable, and false if it is not
1276 /// viable. It computes the cost and adjusts the threshold based on numerous
1277 /// factors and heuristics. If this method returns false but the computed cost
1278 /// is below the computed threshold, then inlining was forcibly disabled by
1279 /// some artifact of the routine.
1280 bool CallAnalyzer::analyzeCall(CallSite CS) {
1281  ++NumCallsAnalyzed;
1282 
1283  // Perform some tweaks to the cost and threshold based on the direct
1284  // callsite information.
1285 
1286  // We want to more aggressively inline vector-dense kernels, so up the
1287  // threshold, and we'll lower it if the % of vector instructions gets too
1288  // low. Note that these bonuses are some what arbitrary and evolved over time
1289  // by accident as much as because they are principled bonuses.
1290  //
1291  // FIXME: It would be nice to remove all such bonuses. At least it would be
1292  // nice to base the bonus values on something more scientific.
1293  assert(NumInstructions == 0);
1294  assert(NumVectorInstructions == 0);
1295 
1296  // Update the threshold based on callsite properties
1297  updateThreshold(CS, F);
1298 
1299  FiftyPercentVectorBonus = 3 * Threshold / 2;
1300  TenPercentVectorBonus = 3 * Threshold / 4;
1301 
1302  // Track whether the post-inlining function would have more than one basic
1303  // block. A single basic block is often intended for inlining. Balloon the
1304  // threshold by 50% until we pass the single-BB phase.
1305  bool SingleBB = true;
1306  int SingleBBBonus = Threshold / 2;
1307 
1308  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1309  // this Threshold any time, and cost cannot decrease, we can stop processing
1310  // the rest of the function body.
1311  Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1312 
1313  // Give out bonuses for the callsite, as the instructions setting them up
1314  // will be gone after inlining.
1315  Cost -= getCallsiteCost(CS, DL);
1316 
1317  // If there is only one call of the function, and it has internal linkage,
1318  // the cost of inlining it drops dramatically.
1319  bool OnlyOneCallAndLocalLinkage =
1320  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1321  if (OnlyOneCallAndLocalLinkage)
1323 
1324  // If this function uses the coldcc calling convention, prefer not to inline
1325  // it.
1326  if (F.getCallingConv() == CallingConv::Cold)
1328 
1329  // Check if we're done. This can happen due to bonuses and penalties.
1330  if (Cost > Threshold)
1331  return false;
1332 
1333  if (F.empty())
1334  return true;
1335 
1336  Function *Caller = CS.getInstruction()->getParent()->getParent();
1337  // Check if the caller function is recursive itself.
1338  for (User *U : Caller->users()) {
1339  CallSite Site(U);
1340  if (!Site)
1341  continue;
1342  Instruction *I = Site.getInstruction();
1343  if (I->getParent()->getParent() == Caller) {
1344  IsCallerRecursive = true;
1345  break;
1346  }
1347  }
1348 
1349  // Populate our simplified values by mapping from function arguments to call
1350  // arguments with known important simplifications.
1351  CallSite::arg_iterator CAI = CS.arg_begin();
1352  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1353  FAI != FAE; ++FAI, ++CAI) {
1354  assert(CAI != CS.arg_end());
1355  if (Constant *C = dyn_cast<Constant>(CAI))
1356  SimplifiedValues[&*FAI] = C;
1357 
1358  Value *PtrArg = *CAI;
1359  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1360  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1361 
1362  // We can SROA any pointer arguments derived from alloca instructions.
1363  if (isa<AllocaInst>(PtrArg)) {
1364  SROAArgValues[&*FAI] = PtrArg;
1365  SROAArgCosts[PtrArg] = 0;
1366  }
1367  }
1368  }
1369  NumConstantArgs = SimplifiedValues.size();
1370  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1371  NumAllocaArgs = SROAArgValues.size();
1372 
1373  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1374  // the ephemeral values multiple times (and they're completely determined by
1375  // the callee, so this is purely duplicate work).
1377  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1378 
1379  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1380  // adding basic blocks of the callee which can be proven to be dead for this
1381  // particular call site in order to get more accurate cost estimates. This
1382  // requires a somewhat heavyweight iteration pattern: we need to walk the
1383  // basic blocks in a breadth-first order as we insert live successors. To
1384  // accomplish this, prioritizing for small iterations because we exit after
1385  // crossing our threshold, we use a small-size optimized SetVector.
1388  BBSetVector;
1389  BBSetVector BBWorklist;
1390  BBWorklist.insert(&F.getEntryBlock());
1391  // Note that we *must not* cache the size, this loop grows the worklist.
1392  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1393  // Bail out the moment we cross the threshold. This means we'll under-count
1394  // the cost, but only when undercounting doesn't matter.
1395  if (Cost > Threshold)
1396  break;
1397 
1398  BasicBlock *BB = BBWorklist[Idx];
1399  if (BB->empty())
1400  continue;
1401 
1402  // Disallow inlining a blockaddress. A blockaddress only has defined
1403  // behavior for an indirect branch in the same function, and we do not
1404  // currently support inlining indirect branches. But, the inliner may not
1405  // see an indirect branch that ends up being dead code at a particular call
1406  // site. If the blockaddress escapes the function, e.g., via a global
1407  // variable, inlining may lead to an invalid cross-function reference.
1408  if (BB->hasAddressTaken())
1409  return false;
1410 
1411  // Analyze the cost of this block. If we blow through the threshold, this
1412  // returns false, and we can bail on out.
1413  if (!analyzeBlock(BB, EphValues))
1414  return false;
1415 
1416  TerminatorInst *TI = BB->getTerminator();
1417 
1418  // Add in the live successors by first checking whether we have terminator
1419  // that may be simplified based on the values simplified by this call.
1420  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1421  if (BI->isConditional()) {
1422  Value *Cond = BI->getCondition();
1423  if (ConstantInt *SimpleCond =
1424  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1425  BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1426  continue;
1427  }
1428  }
1429  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1430  Value *Cond = SI->getCondition();
1431  if (ConstantInt *SimpleCond =
1432  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1433  BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
1434  continue;
1435  }
1436  }
1437 
1438  // If we're unable to select a particular successor, just count all of
1439  // them.
1440  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1441  ++TIdx)
1442  BBWorklist.insert(TI->getSuccessor(TIdx));
1443 
1444  // If we had any successors at this point, than post-inlining is likely to
1445  // have them as well. Note that we assume any basic blocks which existed
1446  // due to branches or switches which folded above will also fold after
1447  // inlining.
1448  if (SingleBB && TI->getNumSuccessors() > 1) {
1449  // Take off the bonus we applied to the threshold.
1450  Threshold -= SingleBBBonus;
1451  SingleBB = false;
1452  }
1453  }
1454 
1455  // If this is a noduplicate call, we can still inline as long as
1456  // inlining this would cause the removal of the caller (so the instruction
1457  // is not actually duplicated, just moved).
1458  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1459  return false;
1460 
1461  // We applied the maximum possible vector bonus at the beginning. Now,
1462  // subtract the excess bonus, if any, from the Threshold before
1463  // comparing against Cost.
1464  if (NumVectorInstructions <= NumInstructions / 10)
1465  Threshold -= FiftyPercentVectorBonus;
1466  else if (NumVectorInstructions <= NumInstructions / 2)
1467  Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
1468 
1469  return Cost < std::max(1, Threshold);
1470 }
1471 
1472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1473 /// \brief Dump stats about this call's analysis.
1475 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1476  DEBUG_PRINT_STAT(NumConstantArgs);
1477  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1478  DEBUG_PRINT_STAT(NumAllocaArgs);
1479  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1480  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1481  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1482  DEBUG_PRINT_STAT(NumInstructions);
1483  DEBUG_PRINT_STAT(SROACostSavings);
1484  DEBUG_PRINT_STAT(SROACostSavingsLost);
1485  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1486  DEBUG_PRINT_STAT(Cost);
1487  DEBUG_PRINT_STAT(Threshold);
1488 #undef DEBUG_PRINT_STAT
1489 }
1490 #endif
1491 
1492 /// \brief Test that there are no attribute conflicts between Caller and Callee
1493 /// that prevent inlining.
1495  Function *Callee,
1496  TargetTransformInfo &TTI) {
1497  return TTI.areInlineCompatible(Caller, Callee) &&
1498  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1499 }
1500 
1502  int Cost = 0;
1503  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1504  if (CS.isByValArgument(I)) {
1505  // We approximate the number of loads and stores needed by dividing the
1506  // size of the byval type by the target's pointer size.
1507  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1508  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1509  unsigned PointerSize = DL.getPointerSizeInBits();
1510  // Ceiling division.
1511  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1512 
1513  // If it generates more than 8 stores it is likely to be expanded as an
1514  // inline memcpy so we take that as an upper bound. Otherwise we assume
1515  // one load and one store per word copied.
1516  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1517  // here instead of a magic number of 8, but it's not available via
1518  // DataLayout.
1519  NumStores = std::min(NumStores, 8U);
1520 
1521  Cost += 2 * NumStores * InlineConstants::InstrCost;
1522  } else {
1523  // For non-byval arguments subtract off one instruction per call
1524  // argument.
1526  }
1527  }
1528  // The call instruction also disappears after inlining.
1529  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1530  return Cost;
1531 }
1532 
1534  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1535  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1537  ProfileSummaryInfo *PSI) {
1538  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1539  GetAssumptionCache, GetBFI, PSI);
1540 }
1541 
1543  CallSite CS, Function *Callee, const InlineParams &Params,
1544  TargetTransformInfo &CalleeTTI,
1545  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1547  ProfileSummaryInfo *PSI) {
1548 
1549  // Cannot inline indirect calls.
1550  if (!Callee)
1551  return llvm::InlineCost::getNever();
1552 
1553  // Calls to functions with always-inline attributes should be inlined
1554  // whenever possible.
1555  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1556  if (isInlineViable(*Callee))
1557  return llvm::InlineCost::getAlways();
1558  return llvm::InlineCost::getNever();
1559  }
1560 
1561  // Never inline functions with conflicting attributes (unless callee has
1562  // always-inline attribute).
1563  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
1564  return llvm::InlineCost::getNever();
1565 
1566  // Don't inline this call if the caller has the optnone attribute.
1567  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1568  return llvm::InlineCost::getNever();
1569 
1570  // Don't inline functions which can be interposed at link-time. Don't inline
1571  // functions marked noinline or call sites marked noinline.
1572  // Note: inlining non-exact non-interposable functions is fine, since we know
1573  // we have *a* correct implementation of the source level function.
1574  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1575  CS.isNoInline())
1576  return llvm::InlineCost::getNever();
1577 
1578  DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
1579  << "...\n");
1580 
1581  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
1582  Params);
1583  bool ShouldInline = CA.analyzeCall(CS);
1584 
1585  DEBUG(CA.dump());
1586 
1587  // Check if there was a reason to force inlining or no inlining.
1588  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1589  return InlineCost::getNever();
1590  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1591  return InlineCost::getAlways();
1592 
1593  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1594 }
1595 
1597  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1598  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1599  // Disallow inlining of functions which contain indirect branches or
1600  // blockaddresses.
1601  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1602  return false;
1603 
1604  for (auto &II : *BI) {
1605  CallSite CS(&II);
1606  if (!CS)
1607  continue;
1608 
1609  // Disallow recursive calls.
1610  if (&F == CS.getCalledFunction())
1611  return false;
1612 
1613  // Disallow calls which expose returns-twice to a function not previously
1614  // attributed as such.
1615  if (!ReturnsTwice && CS.isCall() &&
1616  cast<CallInst>(CS.getInstruction())->canReturnTwice())
1617  return false;
1618 
1619  // Disallow inlining functions that call @llvm.localescape. Doing this
1620  // correctly would require major changes to the inliner.
1621  if (CS.getCalledFunction() &&
1623  llvm::Intrinsic::localescape)
1624  return false;
1625  }
1626  }
1627 
1628  return true;
1629 }
1630 
1631 // APIs to create InlineParams based on command line flags and/or other
1632 // parameters.
1633 
1635  InlineParams Params;
1636 
1637  // This field is the threshold to use for a callee by default. This is
1638  // derived from one or more of:
1639  // * optimization or size-optimization levels,
1640  // * a value passed to createFunctionInliningPass function, or
1641  // * the -inline-threshold flag.
1642  // If the -inline-threshold flag is explicitly specified, that is used
1643  // irrespective of anything else.
1644  if (InlineThreshold.getNumOccurrences() > 0)
1646  else
1647  Params.DefaultThreshold = Threshold;
1648 
1649  // Set the HintThreshold knob from the -inlinehint-threshold.
1650  Params.HintThreshold = HintThreshold;
1651 
1652  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1654 
1655  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
1657 
1658  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1659  // -inlinehint-threshold commandline option is not explicitly given. If that
1660  // option is present, then its value applies even for callees with size and
1661  // minsize attributes.
1662  // If the -inline-threshold is not specified, set the ColdThreshold from the
1663  // -inlinecold-threshold even if it is not explicitly passed. If
1664  // -inline-threshold is specified, then -inlinecold-threshold needs to be
1665  // explicitly specified to set the ColdThreshold knob
1666  if (InlineThreshold.getNumOccurrences() == 0) {
1669  Params.ColdThreshold = ColdThreshold;
1670  } else if (ColdThreshold.getNumOccurrences() > 0) {
1671  Params.ColdThreshold = ColdThreshold;
1672  }
1673  return Params;
1674 }
1675 
1678 }
1679 
1680 // Compute the default threshold for inlining based on the opt level and the
1681 // size opt level.
1682 static int computeThresholdFromOptLevels(unsigned OptLevel,
1683  unsigned SizeOptLevel) {
1684  if (OptLevel > 2)
1686  if (SizeOptLevel == 1) // -Os
1688  if (SizeOptLevel == 2) // -Oz
1690  return InlineThreshold;
1691 }
1692 
1693 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1694  return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1695 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:210
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:123
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:850
bool isSimple() const
Definition: Instructions.h:262
bool empty() const
Definition: Function.h:586
bool hasLocalLinkage() const
Definition: GlobalValue.h:416
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
unsigned arg_size() const
Definition: CallSite.h:216
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:523
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:134
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
iterator end()
Definition: Function.h:582
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:562
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:37
const int ColdccPenalty
Definition: InlineCost.h:47
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1665
A cache of .assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:400
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:254
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:285
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:604
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:346
STATISTIC(NumFunctions, "Total number of functions")
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2126
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:380
op_iterator op_begin()
Definition: User.h:214
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:763
Represents the cost of inlining a function.
Definition: InlineCost.h:64
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:430
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:493
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:128
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:549
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:89
#define DEBUG_PRINT_STAT(x)
Type * getSourceElementType() const
Definition: Instructions.h:934
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:97
bool empty() const
Definition: BasicBlock.h:263
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:929
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:820
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:129
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:891
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:962
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:374
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:46
An instruction for storing to memory.
Definition: Instructions.h:306
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:580
const int CallPenalty
Definition: InlineCost.h:45
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:84
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:564
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2048
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:44
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool hasSampleProfile()
Returns true if module M has sample profile.
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:359
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:40
#define A
Definition: LargeTest.cpp:12
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:372
Expected to fold away in lowering.
op_iterator op_end()
Definition: User.h:216
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:137
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:522
static InlineCost getNever()
Definition: InlineCost.h:88
op_range operands()
Definition: User.h:222
Value * getPointerOperand()
Definition: Instructions.h:270
arg_iterator arg_begin()
Definition: Function.h:595
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1930
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
This class represents a cast from an integer to a pointer.
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:826
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI)
Get an InlineCost object representing the cost of inlining this callsite.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:423
#define E
Definition: LargeTest.cpp:27
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
#define B
Definition: LargeTest.cpp:24
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:183
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:186
IterTy arg_begin() const
Definition: CallSite.h:545
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
bool isConditional() const
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:131
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:167
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:395
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1435
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:532
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:405
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:934
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:264
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:515
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1652
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:519
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:126
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:166
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:572
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:104
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:51
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:41
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:143
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:262
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2072
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:34
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
int * Ptr
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static InlineCost getAlways()
Definition: InlineCost.h:85
Value * getPointerOperand()
Definition: Instructions.h:398
The cost of a &#39;div&#39; instruction on x86.
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:140
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:125
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:80
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)