LLVM  6.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/GlobalAlias.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Operator.h"
35 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "inline-cost"
41 
42 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
43 
45  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
46  cl::desc("Control the amount of inlining to perform (default = 225)"));
47 
49  "inlinehint-threshold", cl::Hidden, cl::init(325),
50  cl::desc("Threshold for inlining functions with inline hint"));
51 
52 static cl::opt<int>
53  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
54  cl::init(45),
55  cl::desc("Threshold for inlining cold callsites"));
56 
57 // We introduce this threshold to help performance of instrumentation based
58 // PGO before we actually hook up inliner with analysis passes such as BPI and
59 // BFI.
61  "inlinecold-threshold", cl::Hidden, cl::init(45),
62  cl::desc("Threshold for inlining functions with cold attribute"));
63 
64 static cl::opt<int>
65  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
67  cl::desc("Threshold for hot callsites "));
68 
70  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
71  cl::desc("Threshold for locally hot callsites "));
72 
74  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
75  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
76  "entry frequency, for a callsite to be cold in the absence of "
77  "profile information."));
78 
80  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
81  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
82  "entry frequency, for a callsite to be hot in the absence of "
83  "profile information."));
84 
86  "inline-cost-full", cl::Hidden, cl::init(false),
87  cl::desc("Compute the full inline cost of a call site even when the cost "
88  "exceeds the threshold."));
89 
90 namespace {
91 
92 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
94  friend class InstVisitor<CallAnalyzer, bool>;
95 
96  /// The TargetTransformInfo available for this compilation.
97  const TargetTransformInfo &TTI;
98 
99  /// Getter for the cache of @llvm.assume intrinsics.
100  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
101 
102  /// Getter for BlockFrequencyInfo
104 
105  /// Profile summary information.
106  ProfileSummaryInfo *PSI;
107 
108  /// The called function.
109  Function &F;
110 
111  // Cache the DataLayout since we use it a lot.
112  const DataLayout &DL;
113 
114  /// The OptimizationRemarkEmitter available for this compilation.
116 
117  /// The candidate callsite being analyzed. Please do not use this to do
118  /// analysis in the caller function; we want the inline cost query to be
119  /// easily cacheable. Instead, use the cover function paramHasAttr.
120  CallSite CandidateCS;
121 
122  /// Tunable parameters that control the analysis.
123  const InlineParams &Params;
124 
125  int Threshold;
126  int Cost;
127  bool ComputeFullInlineCost;
128 
129  bool IsCallerRecursive;
130  bool IsRecursiveCall;
131  bool ExposesReturnsTwice;
132  bool HasDynamicAlloca;
133  bool ContainsNoDuplicateCall;
134  bool HasReturn;
135  bool HasIndirectBr;
136  bool HasFrameEscape;
137 
138  /// Number of bytes allocated statically by the callee.
139  uint64_t AllocatedSize;
140  unsigned NumInstructions, NumVectorInstructions;
141  int VectorBonus, TenPercentVectorBonus;
142  // Bonus to be applied when the callee has only one reachable basic block.
143  int SingleBBBonus;
144 
145  /// While we walk the potentially-inlined instructions, we build up and
146  /// maintain a mapping of simplified values specific to this callsite. The
147  /// idea is to propagate any special information we have about arguments to
148  /// this call through the inlinable section of the function, and account for
149  /// likely simplifications post-inlining. The most important aspect we track
150  /// is CFG altering simplifications -- when we prove a basic block dead, that
151  /// can cause dramatic shifts in the cost of inlining a function.
152  DenseMap<Value *, Constant *> SimplifiedValues;
153 
154  /// Keep track of the values which map back (through function arguments) to
155  /// allocas on the caller stack which could be simplified through SROA.
156  DenseMap<Value *, Value *> SROAArgValues;
157 
158  /// The mapping of caller Alloca values to their accumulated cost savings. If
159  /// we have to disable SROA for one of the allocas, this tells us how much
160  /// cost must be added.
161  DenseMap<Value *, int> SROAArgCosts;
162 
163  /// Keep track of values which map to a pointer base and constant offset.
165 
166  // Custom simplification helper routines.
167  bool isAllocaDerivedArg(Value *V);
168  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
170  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
171  void disableSROA(Value *V);
172  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
173  int InstructionCost);
174  bool isGEPFree(GetElementPtrInst &GEP);
175  bool canFoldInboundsGEP(GetElementPtrInst &I);
176  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
177  bool simplifyCallSite(Function *F, CallSite CS);
178  template <typename Callable>
179  bool simplifyInstruction(Instruction &I, Callable Evaluate);
180  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
181 
182  /// Return true if the given argument to the function being considered for
183  /// inlining has the given attribute set either at the call site or the
184  /// function declaration. Primarily used to inspect call site specific
185  /// attributes since these can be more precise than the ones on the callee
186  /// itself.
187  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
188 
189  /// Return true if the given value is known non null within the callee if
190  /// inlined through this particular callsite.
191  bool isKnownNonNullInCallee(Value *V);
192 
193  /// Update Threshold based on callsite properties such as callee
194  /// attributes and callee hotness for PGO builds. The Callee is explicitly
195  /// passed to support analyzing indirect calls whose target is inferred by
196  /// analysis.
197  void updateThreshold(CallSite CS, Function &Callee);
198 
199  /// Return true if size growth is allowed when inlining the callee at CS.
200  bool allowSizeGrowth(CallSite CS);
201 
202  /// Return true if \p CS is a cold callsite.
203  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
204 
205  /// Return a higher threshold if \p CS is a hot callsite.
206  Optional<int> getHotCallSiteThreshold(CallSite CS,
207  BlockFrequencyInfo *CallerBFI);
208 
209  // Custom analysis routines.
210  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
211 
212  // Disable several entry points to the visitor so we don't accidentally use
213  // them by declaring but not defining them here.
214  void visit(Module *);
215  void visit(Module &);
216  void visit(Function *);
217  void visit(Function &);
218  void visit(BasicBlock *);
219  void visit(BasicBlock &);
220 
221  // Provide base case for our instruction visit.
222  bool visitInstruction(Instruction &I);
223 
224  // Our visit overrides.
225  bool visitAlloca(AllocaInst &I);
226  bool visitPHI(PHINode &I);
227  bool visitGetElementPtr(GetElementPtrInst &I);
228  bool visitBitCast(BitCastInst &I);
229  bool visitPtrToInt(PtrToIntInst &I);
230  bool visitIntToPtr(IntToPtrInst &I);
231  bool visitCastInst(CastInst &I);
232  bool visitUnaryInstruction(UnaryInstruction &I);
233  bool visitCmpInst(CmpInst &I);
234  bool visitAnd(BinaryOperator &I);
235  bool visitOr(BinaryOperator &I);
236  bool visitSub(BinaryOperator &I);
237  bool visitBinaryOperator(BinaryOperator &I);
238  bool visitLoad(LoadInst &I);
239  bool visitStore(StoreInst &I);
240  bool visitExtractValue(ExtractValueInst &I);
241  bool visitInsertValue(InsertValueInst &I);
242  bool visitCallSite(CallSite CS);
243  bool visitReturnInst(ReturnInst &RI);
244  bool visitBranchInst(BranchInst &BI);
245  bool visitSelectInst(SelectInst &SI);
246  bool visitSwitchInst(SwitchInst &SI);
247  bool visitIndirectBrInst(IndirectBrInst &IBI);
248  bool visitResumeInst(ResumeInst &RI);
249  bool visitCleanupReturnInst(CleanupReturnInst &RI);
250  bool visitCatchReturnInst(CatchReturnInst &RI);
251  bool visitUnreachableInst(UnreachableInst &I);
252 
253 public:
254  CallAnalyzer(const TargetTransformInfo &TTI,
255  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
258  Function &Callee, CallSite CSArg, const InlineParams &Params)
259  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
260  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
261  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
262  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
263  Params.ComputeFullInlineCost || ORE),
264  IsCallerRecursive(false), IsRecursiveCall(false),
265  ExposesReturnsTwice(false), HasDynamicAlloca(false),
266  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
267  HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
268  NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
269  NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
270  NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
271  NumInstructionsSimplified(0), SROACostSavings(0),
272  SROACostSavingsLost(0) {}
273 
274  bool analyzeCall(CallSite CS);
275 
276  int getThreshold() { return Threshold; }
277  int getCost() { return Cost; }
278 
279  // Keep a bunch of stats about the cost savings found so we can print them
280  // out when debugging.
281  unsigned NumConstantArgs;
282  unsigned NumConstantOffsetPtrArgs;
283  unsigned NumAllocaArgs;
284  unsigned NumConstantPtrCmps;
285  unsigned NumConstantPtrDiffs;
286  unsigned NumInstructionsSimplified;
287  unsigned SROACostSavings;
288  unsigned SROACostSavingsLost;
289 
290  void dump();
291 };
292 
293 } // namespace
294 
295 /// \brief Test whether the given value is an Alloca-derived function argument.
296 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
297  return SROAArgValues.count(V);
298 }
299 
300 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
301 /// Returns false if V does not map to a SROA-candidate.
302 bool CallAnalyzer::lookupSROAArgAndCost(
304  if (SROAArgValues.empty() || SROAArgCosts.empty())
305  return false;
306 
307  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
308  if (ArgIt == SROAArgValues.end())
309  return false;
310 
311  Arg = ArgIt->second;
312  CostIt = SROAArgCosts.find(Arg);
313  return CostIt != SROAArgCosts.end();
314 }
315 
316 /// \brief Disable SROA for the candidate marked by this cost iterator.
317 ///
318 /// This marks the candidate as no longer viable for SROA, and adds the cost
319 /// savings associated with it back into the inline cost measurement.
320 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
321  // If we're no longer able to perform SROA we need to undo its cost savings
322  // and prevent subsequent analysis.
323  Cost += CostIt->second;
324  SROACostSavings -= CostIt->second;
325  SROACostSavingsLost += CostIt->second;
326  SROAArgCosts.erase(CostIt);
327 }
328 
329 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
330 void CallAnalyzer::disableSROA(Value *V) {
331  Value *SROAArg;
333  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
334  disableSROA(CostIt);
335 }
336 
337 /// \brief Accumulate the given cost for a particular SROA candidate.
338 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
339  int InstructionCost) {
340  CostIt->second += InstructionCost;
341  SROACostSavings += InstructionCost;
342 }
343 
344 /// \brief Accumulate a constant GEP offset into an APInt if possible.
345 ///
346 /// Returns false if unable to compute the offset for any reason. Respects any
347 /// simplified values known during the analysis of this callsite.
348 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
349  unsigned IntPtrWidth = DL.getPointerSizeInBits();
350  assert(IntPtrWidth == Offset.getBitWidth());
351 
352  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
353  GTI != GTE; ++GTI) {
354  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
355  if (!OpC)
356  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
357  OpC = dyn_cast<ConstantInt>(SimpleOp);
358  if (!OpC)
359  return false;
360  if (OpC->isZero())
361  continue;
362 
363  // Handle a struct index, which adds its field offset to the pointer.
364  if (StructType *STy = GTI.getStructTypeOrNull()) {
365  unsigned ElementIdx = OpC->getZExtValue();
366  const StructLayout *SL = DL.getStructLayout(STy);
367  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
368  continue;
369  }
370 
371  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
372  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
373  }
374  return true;
375 }
376 
377 /// \brief Use TTI to check whether a GEP is free.
378 ///
379 /// Respects any simplified values known during the analysis of this callsite.
380 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
381  SmallVector<Value *, 4> Operands;
382  Operands.push_back(GEP.getOperand(0));
383  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
384  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
385  Operands.push_back(SimpleOp);
386  else
387  Operands.push_back(*I);
388  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
389 }
390 
391 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
392  // Check whether inlining will turn a dynamic alloca into a static
393  // alloca and handle that case.
394  if (I.isArrayAllocation()) {
395  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
396  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
397  Type *Ty = I.getAllocatedType();
398  AllocatedSize = SaturatingMultiplyAdd(
399  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
400  return Base::visitAlloca(I);
401  }
402  }
403 
404  // Accumulate the allocated size.
405  if (I.isStaticAlloca()) {
406  Type *Ty = I.getAllocatedType();
407  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
408  }
409 
410  // We will happily inline static alloca instructions.
411  if (I.isStaticAlloca())
412  return Base::visitAlloca(I);
413 
414  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
415  // a variety of reasons, and so we would like to not inline them into
416  // functions which don't currently have a dynamic alloca. This simply
417  // disables inlining altogether in the presence of a dynamic alloca.
418  HasDynamicAlloca = true;
419  return false;
420 }
421 
422 bool CallAnalyzer::visitPHI(PHINode &I) {
423  // FIXME: We should potentially be tracking values through phi nodes,
424  // especially when they collapse to a single value due to deleted CFG edges
425  // during inlining.
426 
427  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
428  // though we don't want to propagate it's bonuses. The idea is to disable
429  // SROA if it *might* be used in an inappropriate manner.
430 
431  // Phi nodes are always zero-cost.
432  return true;
433 }
434 
435 /// \brief Check we can fold GEPs of constant-offset call site argument pointers.
436 /// This requires target data and inbounds GEPs.
437 ///
438 /// \return true if the specified GEP can be folded.
439 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
440  // Check if we have a base + offset for the pointer.
441  std::pair<Value *, APInt> BaseAndOffset =
442  ConstantOffsetPtrs.lookup(I.getPointerOperand());
443  if (!BaseAndOffset.first)
444  return false;
445 
446  // Check if the offset of this GEP is constant, and if so accumulate it
447  // into Offset.
448  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
449  return false;
450 
451  // Add the result as a new mapping to Base + Offset.
452  ConstantOffsetPtrs[&I] = BaseAndOffset;
453 
454  return true;
455 }
456 
457 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
458  Value *SROAArg;
460  bool SROACandidate =
461  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
462 
463  // Lambda to check whether a GEP's indices are all constant.
464  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
465  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
466  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
467  return false;
468  return true;
469  };
470 
471  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
472  if (SROACandidate)
473  SROAArgValues[&I] = SROAArg;
474 
475  // Constant GEPs are modeled as free.
476  return true;
477  }
478 
479  // Variable GEPs will require math and will disable SROA.
480  if (SROACandidate)
481  disableSROA(CostIt);
482  return isGEPFree(I);
483 }
484 
485 /// Simplify \p I if its operands are constants and update SimplifiedValues.
486 /// \p Evaluate is a callable specific to instruction type that evaluates the
487 /// instruction when all the operands are constants.
488 template <typename Callable>
489 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
491  for (Value *Op : I.operands()) {
492  Constant *COp = dyn_cast<Constant>(Op);
493  if (!COp)
494  COp = SimplifiedValues.lookup(Op);
495  if (!COp)
496  return false;
497  COps.push_back(COp);
498  }
499  auto *C = Evaluate(COps);
500  if (!C)
501  return false;
502  SimplifiedValues[&I] = C;
503  return true;
504 }
505 
506 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
507  // Propagate constants through bitcasts.
508  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
509  return ConstantExpr::getBitCast(COps[0], I.getType());
510  }))
511  return true;
512 
513  // Track base/offsets through casts
514  std::pair<Value *, APInt> BaseAndOffset =
515  ConstantOffsetPtrs.lookup(I.getOperand(0));
516  // Casts don't change the offset, just wrap it up.
517  if (BaseAndOffset.first)
518  ConstantOffsetPtrs[&I] = BaseAndOffset;
519 
520  // Also look for SROA candidates here.
521  Value *SROAArg;
523  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
524  SROAArgValues[&I] = SROAArg;
525 
526  // Bitcasts are always zero cost.
527  return true;
528 }
529 
530 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
531  // Propagate constants through ptrtoint.
532  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
533  return ConstantExpr::getPtrToInt(COps[0], I.getType());
534  }))
535  return true;
536 
537  // Track base/offset pairs when converted to a plain integer provided the
538  // integer is large enough to represent the pointer.
539  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
540  if (IntegerSize >= DL.getPointerSizeInBits()) {
541  std::pair<Value *, APInt> BaseAndOffset =
542  ConstantOffsetPtrs.lookup(I.getOperand(0));
543  if (BaseAndOffset.first)
544  ConstantOffsetPtrs[&I] = BaseAndOffset;
545  }
546 
547  // This is really weird. Technically, ptrtoint will disable SROA. However,
548  // unless that ptrtoint is *used* somewhere in the live basic blocks after
549  // inlining, it will be nuked, and SROA should proceed. All of the uses which
550  // would block SROA would also block SROA if applied directly to a pointer,
551  // and so we can just add the integer in here. The only places where SROA is
552  // preserved either cannot fire on an integer, or won't in-and-of themselves
553  // disable SROA (ext) w/o some later use that we would see and disable.
554  Value *SROAArg;
556  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
557  SROAArgValues[&I] = SROAArg;
558 
559  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
560 }
561 
562 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
563  // Propagate constants through ptrtoint.
564  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
565  return ConstantExpr::getIntToPtr(COps[0], I.getType());
566  }))
567  return true;
568 
569  // Track base/offset pairs when round-tripped through a pointer without
570  // modifications provided the integer is not too large.
571  Value *Op = I.getOperand(0);
572  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
573  if (IntegerSize <= DL.getPointerSizeInBits()) {
574  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
575  if (BaseAndOffset.first)
576  ConstantOffsetPtrs[&I] = BaseAndOffset;
577  }
578 
579  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
580  Value *SROAArg;
582  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
583  SROAArgValues[&I] = SROAArg;
584 
585  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
586 }
587 
588 bool CallAnalyzer::visitCastInst(CastInst &I) {
589  // Propagate constants through ptrtoint.
590  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
591  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
592  }))
593  return true;
594 
595  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
596  disableSROA(I.getOperand(0));
597 
598  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
599 }
600 
601 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
602  Value *Operand = I.getOperand(0);
603  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
604  return ConstantFoldInstOperands(&I, COps[0], DL);
605  }))
606  return true;
607 
608  // Disable any SROA on the argument to arbitrary unary operators.
609  disableSROA(Operand);
610 
611  return false;
612 }
613 
614 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
615  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
616 }
617 
618 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
619  // Does the *call site* have the NonNull attribute set on an argument? We
620  // use the attribute on the call site to memoize any analysis done in the
621  // caller. This will also trip if the callee function has a non-null
622  // parameter attribute, but that's a less interesting case because hopefully
623  // the callee would already have been simplified based on that.
624  if (Argument *A = dyn_cast<Argument>(V))
625  if (paramHasAttr(A, Attribute::NonNull))
626  return true;
627 
628  // Is this an alloca in the caller? This is distinct from the attribute case
629  // above because attributes aren't updated within the inliner itself and we
630  // always want to catch the alloca derived case.
631  if (isAllocaDerivedArg(V))
632  // We can actually predict the result of comparisons between an
633  // alloca-derived value and null. Note that this fires regardless of
634  // SROA firing.
635  return true;
636 
637  return false;
638 }
639 
640 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
641  // If the normal destination of the invoke or the parent block of the call
642  // site is unreachable-terminated, there is little point in inlining this
643  // unless there is literally zero cost.
644  // FIXME: Note that it is possible that an unreachable-terminated block has a
645  // hot entry. For example, in below scenario inlining hot_call_X() may be
646  // beneficial :
647  // main() {
648  // hot_call_1();
649  // ...
650  // hot_call_N()
651  // exit(0);
652  // }
653  // For now, we are not handling this corner case here as it is rare in real
654  // code. In future, we should elaborate this based on BPI and BFI in more
655  // general threshold adjusting heuristics in updateThreshold().
656  Instruction *Instr = CS.getInstruction();
657  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
658  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
659  return false;
660  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
661  return false;
662 
663  return true;
664 }
665 
666 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
667  // If global profile summary is available, then callsite's coldness is
668  // determined based on that.
669  if (PSI && PSI->hasProfileSummary())
670  return PSI->isColdCallSite(CS, CallerBFI);
671 
672  // Otherwise we need BFI to be available.
673  if (!CallerBFI)
674  return false;
675 
676  // Determine if the callsite is cold relative to caller's entry. We could
677  // potentially cache the computation of scaled entry frequency, but the added
678  // complexity is not worth it unless this scaling shows up high in the
679  // profiles.
680  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
681  auto CallSiteBB = CS.getInstruction()->getParent();
682  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
683  auto CallerEntryFreq =
684  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
685  return CallSiteFreq < CallerEntryFreq * ColdProb;
686 }
687 
689 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
690  BlockFrequencyInfo *CallerBFI) {
691 
692  // If global profile summary is available, then callsite's hotness is
693  // determined based on that.
694  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
695  return Params.HotCallSiteThreshold;
696 
697  // Otherwise we need BFI to be available and to have a locally hot callsite
698  // threshold.
699  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
700  return None;
701 
702  // Determine if the callsite is hot relative to caller's entry. We could
703  // potentially cache the computation of scaled entry frequency, but the added
704  // complexity is not worth it unless this scaling shows up high in the
705  // profiles.
706  auto CallSiteBB = CS.getInstruction()->getParent();
707  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
708  auto CallerEntryFreq = CallerBFI->getEntryFreq();
709  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
710  return Params.LocallyHotCallSiteThreshold;
711 
712  // Otherwise treat it normally.
713  return None;
714 }
715 
716 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
717  // If no size growth is allowed for this inlining, set Threshold to 0.
718  if (!allowSizeGrowth(CS)) {
719  Threshold = 0;
720  return;
721  }
722 
723  Function *Caller = CS.getCaller();
724 
725  // return min(A, B) if B is valid.
726  auto MinIfValid = [](int A, Optional<int> B) {
727  return B ? std::min(A, B.getValue()) : A;
728  };
729 
730  // return max(A, B) if B is valid.
731  auto MaxIfValid = [](int A, Optional<int> B) {
732  return B ? std::max(A, B.getValue()) : A;
733  };
734 
735  // Various bonus percentages. These are multiplied by Threshold to get the
736  // bonus values.
737  // SingleBBBonus: This bonus is applied if the callee has a single reachable
738  // basic block at the given callsite context. This is speculatively applied
739  // and withdrawn if more than one basic block is seen.
740  //
741  // Vector bonuses: We want to more aggressively inline vector-dense kernels
742  // and apply this bonus based on the percentage of vector instructions. A
743  // bonus is applied if the vector instructions exceed 50% and half that amount
744  // is applied if it exceeds 10%. Note that these bonuses are some what
745  // arbitrary and evolved over time by accident as much as because they are
746  // principled bonuses.
747  // FIXME: It would be nice to base the bonus values on something more
748  // scientific.
749  //
750  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
751  // of the last call to a static function as inlining such functions is
752  // guaranteed to reduce code size.
753  //
754  // These bonus percentages may be set to 0 based on properties of the caller
755  // and the callsite.
756  int SingleBBBonusPercent = 50;
757  int VectorBonusPercent = 150;
759 
760  // Lambda to set all the above bonus and bonus percentages to 0.
761  auto DisallowAllBonuses = [&]() {
762  SingleBBBonusPercent = 0;
763  VectorBonusPercent = 0;
764  LastCallToStaticBonus = 0;
765  };
766 
767  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
768  // and reduce the threshold if the caller has the necessary attribute.
769  if (Caller->optForMinSize()) {
770  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
771  // For minsize, we want to disable the single BB bonus and the vector
772  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
773  // a static function will, at the minimum, eliminate the parameter setup and
774  // call/return instructions.
775  SingleBBBonusPercent = 0;
776  VectorBonusPercent = 0;
777  } else if (Caller->optForSize())
778  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
779 
780  // Adjust the threshold based on inlinehint attribute and profile based
781  // hotness information if the caller does not have MinSize attribute.
782  if (!Caller->optForMinSize()) {
783  if (Callee.hasFnAttribute(Attribute::InlineHint))
784  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
785 
786  // FIXME: After switching to the new passmanager, simplify the logic below
787  // by checking only the callsite hotness/coldness as we will reliably
788  // have local profile information.
789  //
790  // Callsite hotness and coldness can be determined if sample profile is
791  // used (which adds hotness metadata to calls) or if caller's
792  // BlockFrequencyInfo is available.
793  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
794  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
795  if (!Caller->optForSize() && HotCallSiteThreshold) {
796  DEBUG(dbgs() << "Hot callsite.\n");
797  // FIXME: This should update the threshold only if it exceeds the
798  // current threshold, but AutoFDO + ThinLTO currently relies on this
799  // behavior to prevent inlining of hot callsites during ThinLTO
800  // compile phase.
801  Threshold = HotCallSiteThreshold.getValue();
802  } else if (isColdCallSite(CS, CallerBFI)) {
803  DEBUG(dbgs() << "Cold callsite.\n");
804  // Do not apply bonuses for a cold callsite including the
805  // LastCallToStatic bonus. While this bonus might result in code size
806  // reduction, it can cause the size of a non-cold caller to increase
807  // preventing it from being inlined.
808  DisallowAllBonuses();
809  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
810  } else if (PSI) {
811  // Use callee's global profile information only if we have no way of
812  // determining this via callsite information.
813  if (PSI->isFunctionEntryHot(&Callee)) {
814  DEBUG(dbgs() << "Hot callee.\n");
815  // If callsite hotness can not be determined, we may still know
816  // that the callee is hot and treat it as a weaker hint for threshold
817  // increase.
818  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
819  } else if (PSI->isFunctionEntryCold(&Callee)) {
820  DEBUG(dbgs() << "Cold callee.\n");
821  // Do not apply bonuses for a cold callee including the
822  // LastCallToStatic bonus. While this bonus might result in code size
823  // reduction, it can cause the size of a non-cold caller to increase
824  // preventing it from being inlined.
825  DisallowAllBonuses();
826  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
827  }
828  }
829  }
830 
831  // Finally, take the target-specific inlining threshold multiplier into
832  // account.
833  Threshold *= TTI.getInliningThresholdMultiplier();
834 
835  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
836  VectorBonus = Threshold * VectorBonusPercent / 100;
837 
838  bool OnlyOneCallAndLocalLinkage =
839  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
840  // If there is only one call of the function, and it has internal linkage,
841  // the cost of inlining it drops dramatically. It may seem odd to update
842  // Cost in updateThreshold, but the bonus depends on the logic in this method.
843  if (OnlyOneCallAndLocalLinkage)
844  Cost -= LastCallToStaticBonus;
845 }
846 
847 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
848  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
849  // First try to handle simplified comparisons.
850  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
851  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
852  }))
853  return true;
854 
855  if (I.getOpcode() == Instruction::FCmp)
856  return false;
857 
858  // Otherwise look for a comparison between constant offset pointers with
859  // a common base.
860  Value *LHSBase, *RHSBase;
861  APInt LHSOffset, RHSOffset;
862  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
863  if (LHSBase) {
864  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
865  if (RHSBase && LHSBase == RHSBase) {
866  // We have common bases, fold the icmp to a constant based on the
867  // offsets.
868  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
869  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
870  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
871  SimplifiedValues[&I] = C;
872  ++NumConstantPtrCmps;
873  return true;
874  }
875  }
876  }
877 
878  // If the comparison is an equality comparison with null, we can simplify it
879  // if we know the value (argument) can't be null
880  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
881  isKnownNonNullInCallee(I.getOperand(0))) {
882  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
883  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
885  return true;
886  }
887  // Finally check for SROA candidates in comparisons.
888  Value *SROAArg;
890  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
891  if (isa<ConstantPointerNull>(I.getOperand(1))) {
892  accumulateSROACost(CostIt, InlineConstants::InstrCost);
893  return true;
894  }
895 
896  disableSROA(CostIt);
897  }
898 
899  return false;
900 }
901 
902 bool CallAnalyzer::visitOr(BinaryOperator &I) {
903  // This is necessary because the generic simplify instruction only works if
904  // both operands are constants.
905  for (unsigned i = 0; i < 2; ++i) {
906  if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
907  SimplifiedValues.lookup(I.getOperand(i))))
908  if (C->isAllOnesValue()) {
909  SimplifiedValues[&I] = C;
910  return true;
911  }
912  }
913  return Base::visitOr(I);
914 }
915 
916 bool CallAnalyzer::visitAnd(BinaryOperator &I) {
917  // This is necessary because the generic simplify instruction only works if
918  // both operands are constants.
919  for (unsigned i = 0; i < 2; ++i) {
920  if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
921  SimplifiedValues.lookup(I.getOperand(i))))
922  if (C->isZero()) {
923  SimplifiedValues[&I] = C;
924  return true;
925  }
926  }
927  return Base::visitAnd(I);
928 }
929 
930 bool CallAnalyzer::visitSub(BinaryOperator &I) {
931  // Try to handle a special case: we can fold computing the difference of two
932  // constant-related pointers.
933  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
934  Value *LHSBase, *RHSBase;
935  APInt LHSOffset, RHSOffset;
936  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
937  if (LHSBase) {
938  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
939  if (RHSBase && LHSBase == RHSBase) {
940  // We have common bases, fold the subtract to a constant based on the
941  // offsets.
942  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
943  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
944  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
945  SimplifiedValues[&I] = C;
946  ++NumConstantPtrDiffs;
947  return true;
948  }
949  }
950  }
951 
952  // Otherwise, fall back to the generic logic for simplifying and handling
953  // instructions.
954  return Base::visitSub(I);
955 }
956 
957 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
958  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
959  auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
960  Value *SimpleV = nullptr;
961  if (auto FI = dyn_cast<FPMathOperator>(&I))
962  SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
963  FI->getFastMathFlags(), DL);
964  else
965  SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
966  return dyn_cast_or_null<Constant>(SimpleV);
967  };
968 
969  if (simplifyInstruction(I, Evaluate))
970  return true;
971 
972  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
973  disableSROA(LHS);
974  disableSROA(RHS);
975 
976  return false;
977 }
978 
979 bool CallAnalyzer::visitLoad(LoadInst &I) {
980  Value *SROAArg;
982  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
983  if (I.isSimple()) {
984  accumulateSROACost(CostIt, InlineConstants::InstrCost);
985  return true;
986  }
987 
988  disableSROA(CostIt);
989  }
990 
991  return false;
992 }
993 
994 bool CallAnalyzer::visitStore(StoreInst &I) {
995  Value *SROAArg;
997  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
998  if (I.isSimple()) {
999  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1000  return true;
1001  }
1002 
1003  disableSROA(CostIt);
1004  }
1005 
1006  return false;
1007 }
1008 
1009 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1010  // Constant folding for extract value is trivial.
1011  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1012  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1013  }))
1014  return true;
1015 
1016  // SROA can look through these but give them a cost.
1017  return false;
1018 }
1019 
1020 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1021  // Constant folding for insert value is trivial.
1022  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1023  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1024  /*InsertedValueOperand*/ COps[1],
1025  I.getIndices());
1026  }))
1027  return true;
1028 
1029  // SROA can look through these but give them a cost.
1030  return false;
1031 }
1032 
1033 /// \brief Try to simplify a call site.
1034 ///
1035 /// Takes a concrete function and callsite and tries to actually simplify it by
1036 /// analyzing the arguments and call itself with instsimplify. Returns true if
1037 /// it has simplified the callsite to some other entity (a constant), making it
1038 /// free.
1039 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1040  // FIXME: Using the instsimplify logic directly for this is inefficient
1041  // because we have to continually rebuild the argument list even when no
1042  // simplifications can be performed. Until that is fixed with remapping
1043  // inside of instsimplify, directly constant fold calls here.
1044  if (!canConstantFoldCallTo(CS, F))
1045  return false;
1046 
1047  // Try to re-map the arguments to constants.
1048  SmallVector<Constant *, 4> ConstantArgs;
1049  ConstantArgs.reserve(CS.arg_size());
1050  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1051  ++I) {
1052  Constant *C = dyn_cast<Constant>(*I);
1053  if (!C)
1054  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1055  if (!C)
1056  return false; // This argument doesn't map to a constant.
1057 
1058  ConstantArgs.push_back(C);
1059  }
1060  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1061  SimplifiedValues[CS.getInstruction()] = C;
1062  return true;
1063  }
1064 
1065  return false;
1066 }
1067 
1068 bool CallAnalyzer::visitCallSite(CallSite CS) {
1069  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1070  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1071  // This aborts the entire analysis.
1072  ExposesReturnsTwice = true;
1073  return false;
1074  }
1075  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1076  ContainsNoDuplicateCall = true;
1077 
1078  if (Function *F = CS.getCalledFunction()) {
1079  // When we have a concrete function, first try to simplify it directly.
1080  if (simplifyCallSite(F, CS))
1081  return true;
1082 
1083  // Next check if it is an intrinsic we know about.
1084  // FIXME: Lift this into part of the InstVisitor.
1085  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1086  switch (II->getIntrinsicID()) {
1087  default:
1088  return Base::visitCallSite(CS);
1089 
1090  case Intrinsic::load_relative:
1091  // This is normally lowered to 4 LLVM instructions.
1092  Cost += 3 * InlineConstants::InstrCost;
1093  return false;
1094 
1095  case Intrinsic::memset:
1096  case Intrinsic::memcpy:
1097  case Intrinsic::memmove:
1098  // SROA can usually chew through these intrinsics, but they aren't free.
1099  return false;
1100  case Intrinsic::localescape:
1101  HasFrameEscape = true;
1102  return false;
1103  }
1104  }
1105 
1106  if (F == CS.getInstruction()->getParent()->getParent()) {
1107  // This flag will fully abort the analysis, so don't bother with anything
1108  // else.
1109  IsRecursiveCall = true;
1110  return false;
1111  }
1112 
1113  if (TTI.isLoweredToCall(F)) {
1114  // We account for the average 1 instruction per call argument setup
1115  // here.
1116  Cost += CS.arg_size() * InlineConstants::InstrCost;
1117 
1118  // Everything other than inline ASM will also have a significant cost
1119  // merely from making the call.
1120  if (!isa<InlineAsm>(CS.getCalledValue()))
1122  }
1123 
1124  return Base::visitCallSite(CS);
1125  }
1126 
1127  // Otherwise we're in a very special case -- an indirect function call. See
1128  // if we can be particularly clever about this.
1129  Value *Callee = CS.getCalledValue();
1130 
1131  // First, pay the price of the argument setup. We account for the average
1132  // 1 instruction per call argument setup here.
1133  Cost += CS.arg_size() * InlineConstants::InstrCost;
1134 
1135  // Next, check if this happens to be an indirect function call to a known
1136  // function in this inline context. If not, we've done all we can.
1137  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1138  if (!F)
1139  return Base::visitCallSite(CS);
1140 
1141  // If we have a constant that we are calling as a function, we can peer
1142  // through it and see the function target. This happens not infrequently
1143  // during devirtualization and so we want to give it a hefty bonus for
1144  // inlining, but cap that bonus in the event that inlining wouldn't pan
1145  // out. Pretend to inline the function, with a custom threshold.
1146  auto IndirectCallParams = Params;
1148  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1149  IndirectCallParams);
1150  if (CA.analyzeCall(CS)) {
1151  // We were able to inline the indirect call! Subtract the cost from the
1152  // threshold to get the bonus we want to apply, but don't go below zero.
1153  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1154  }
1155 
1156  return Base::visitCallSite(CS);
1157 }
1158 
1159 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1160  // At least one return instruction will be free after inlining.
1161  bool Free = !HasReturn;
1162  HasReturn = true;
1163  return Free;
1164 }
1165 
1166 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1167  // We model unconditional branches as essentially free -- they really
1168  // shouldn't exist at all, but handling them makes the behavior of the
1169  // inliner more regular and predictable. Interestingly, conditional branches
1170  // which will fold away are also free.
1171  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1172  dyn_cast_or_null<ConstantInt>(
1173  SimplifiedValues.lookup(BI.getCondition()));
1174 }
1175 
1176 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1177  bool CheckSROA = SI.getType()->isPointerTy();
1178  Value *TrueVal = SI.getTrueValue();
1179  Value *FalseVal = SI.getFalseValue();
1180 
1181  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1182  if (!TrueC)
1183  TrueC = SimplifiedValues.lookup(TrueVal);
1184  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1185  if (!FalseC)
1186  FalseC = SimplifiedValues.lookup(FalseVal);
1187  Constant *CondC =
1188  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1189 
1190  if (!CondC) {
1191  // Select C, X, X => X
1192  if (TrueC == FalseC && TrueC) {
1193  SimplifiedValues[&SI] = TrueC;
1194  return true;
1195  }
1196 
1197  if (!CheckSROA)
1198  return Base::visitSelectInst(SI);
1199 
1200  std::pair<Value *, APInt> TrueBaseAndOffset =
1201  ConstantOffsetPtrs.lookup(TrueVal);
1202  std::pair<Value *, APInt> FalseBaseAndOffset =
1203  ConstantOffsetPtrs.lookup(FalseVal);
1204  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1205  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1206 
1207  Value *SROAArg;
1209  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1210  SROAArgValues[&SI] = SROAArg;
1211  return true;
1212  }
1213 
1214  return Base::visitSelectInst(SI);
1215  }
1216 
1217  // Select condition is a constant.
1218  Value *SelectedV = CondC->isAllOnesValue()
1219  ? TrueVal
1220  : (CondC->isNullValue()) ? FalseVal : nullptr;
1221  if (!SelectedV) {
1222  // Condition is a vector constant that is not all 1s or all 0s. If all
1223  // operands are constants, ConstantExpr::getSelect() can handle the cases
1224  // such as select vectors.
1225  if (TrueC && FalseC) {
1226  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1227  SimplifiedValues[&SI] = C;
1228  return true;
1229  }
1230  }
1231  return Base::visitSelectInst(SI);
1232  }
1233 
1234  // Condition is either all 1s or all 0s. SI can be simplified.
1235  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1236  SimplifiedValues[&SI] = SelectedC;
1237  return true;
1238  }
1239 
1240  if (!CheckSROA)
1241  return true;
1242 
1243  std::pair<Value *, APInt> BaseAndOffset =
1244  ConstantOffsetPtrs.lookup(SelectedV);
1245  if (BaseAndOffset.first) {
1246  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1247 
1248  Value *SROAArg;
1250  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1251  SROAArgValues[&SI] = SROAArg;
1252  }
1253 
1254  return true;
1255 }
1256 
1257 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1258  // We model unconditional switches as free, see the comments on handling
1259  // branches.
1260  if (isa<ConstantInt>(SI.getCondition()))
1261  return true;
1262  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1263  if (isa<ConstantInt>(V))
1264  return true;
1265 
1266  // Assume the most general case where the switch is lowered into
1267  // either a jump table, bit test, or a balanced binary tree consisting of
1268  // case clusters without merging adjacent clusters with the same
1269  // destination. We do not consider the switches that are lowered with a mix
1270  // of jump table/bit test/binary search tree. The cost of the switch is
1271  // proportional to the size of the tree or the size of jump table range.
1272  //
1273  // NB: We convert large switches which are just used to initialize large phi
1274  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1275  // inlining those. It will prevent inlining in cases where the optimization
1276  // does not (yet) fire.
1277 
1278  // Maximum valid cost increased in this function.
1279  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1280 
1281  // Exit early for a large switch, assuming one case needs at least one
1282  // instruction.
1283  // FIXME: This is not true for a bit test, but ignore such case for now to
1284  // save compile-time.
1285  int64_t CostLowerBound =
1286  std::min((int64_t)CostUpperBound,
1287  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1288 
1289  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1290  Cost = CostLowerBound;
1291  return false;
1292  }
1293 
1294  unsigned JumpTableSize = 0;
1295  unsigned NumCaseCluster =
1296  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1297 
1298  // If suitable for a jump table, consider the cost for the table size and
1299  // branch to destination.
1300  if (JumpTableSize) {
1301  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1302  4 * InlineConstants::InstrCost;
1303 
1304  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1305  return false;
1306  }
1307 
1308  // Considering forming a binary search, we should find the number of nodes
1309  // which is same as the number of comparisons when lowered. For a given
1310  // number of clusters, n, we can define a recursive function, f(n), to find
1311  // the number of nodes in the tree. The recursion is :
1312  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1313  // and f(n) = n, when n <= 3.
1314  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1315  // when n > 3. So, the number of comparisons from leaves should be n, while
1316  // the number of non-leaf should be :
1317  // 2^(log2(n) - 1) - 1
1318  // = 2^log2(n) * 2^-1 - 1
1319  // = n / 2 - 1.
1320  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1321  // number of comparisons in a simple closed form :
1322  // n + n / 2 - 1 = n * 3 / 2 - 1
1323  if (NumCaseCluster <= 3) {
1324  // Suppose a comparison includes one compare and one conditional branch.
1325  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1326  return false;
1327  }
1328 
1329  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1330  int64_t SwitchCost =
1331  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1332 
1333  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1334  return false;
1335 }
1336 
1337 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1338  // We never want to inline functions that contain an indirectbr. This is
1339  // incorrect because all the blockaddress's (in static global initializers
1340  // for example) would be referring to the original function, and this
1341  // indirect jump would jump from the inlined copy of the function into the
1342  // original function which is extremely undefined behavior.
1343  // FIXME: This logic isn't really right; we can safely inline functions with
1344  // indirectbr's as long as no other function or global references the
1345  // blockaddress of a block within the current function.
1346  HasIndirectBr = true;
1347  return false;
1348 }
1349 
1350 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1351  // FIXME: It's not clear that a single instruction is an accurate model for
1352  // the inline cost of a resume instruction.
1353  return false;
1354 }
1355 
1356 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1357  // FIXME: It's not clear that a single instruction is an accurate model for
1358  // the inline cost of a cleanupret instruction.
1359  return false;
1360 }
1361 
1362 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1363  // FIXME: It's not clear that a single instruction is an accurate model for
1364  // the inline cost of a catchret instruction.
1365  return false;
1366 }
1367 
1368 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1369  // FIXME: It might be reasonably to discount the cost of instructions leading
1370  // to unreachable as they have the lowest possible impact on both runtime and
1371  // code size.
1372  return true; // No actual code is needed for unreachable.
1373 }
1374 
1375 bool CallAnalyzer::visitInstruction(Instruction &I) {
1376  // Some instructions are free. All of the free intrinsics can also be
1377  // handled by SROA, etc.
1378  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1379  return true;
1380 
1381  // We found something we don't understand or can't handle. Mark any SROA-able
1382  // values in the operand list as no longer viable.
1383  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1384  disableSROA(*OI);
1385 
1386  return false;
1387 }
1388 
1389 /// \brief Analyze a basic block for its contribution to the inline cost.
1390 ///
1391 /// This method walks the analyzer over every instruction in the given basic
1392 /// block and accounts for their cost during inlining at this callsite. It
1393 /// aborts early if the threshold has been exceeded or an impossible to inline
1394 /// construct has been detected. It returns false if inlining is no longer
1395 /// viable, and true if inlining remains viable.
1396 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1397  SmallPtrSetImpl<const Value *> &EphValues) {
1398  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1399  // FIXME: Currently, the number of instructions in a function regardless of
1400  // our ability to simplify them during inline to constants or dead code,
1401  // are actually used by the vector bonus heuristic. As long as that's true,
1402  // we have to special case debug intrinsics here to prevent differences in
1403  // inlining due to debug symbols. Eventually, the number of unsimplified
1404  // instructions shouldn't factor into the cost computation, but until then,
1405  // hack around it here.
1406  if (isa<DbgInfoIntrinsic>(I))
1407  continue;
1408 
1409  // Skip ephemeral values.
1410  if (EphValues.count(&*I))
1411  continue;
1412 
1413  ++NumInstructions;
1414  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1415  ++NumVectorInstructions;
1416 
1417  // If the instruction is floating point, and the target says this operation
1418  // is expensive or the function has the "use-soft-float" attribute, this may
1419  // eventually become a library call. Treat the cost as such.
1420  if (I->getType()->isFloatingPointTy()) {
1421  // If the function has the "use-soft-float" attribute, mark it as
1422  // expensive.
1423  if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1424  (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
1426  }
1427 
1428  // If the instruction simplified to a constant, there is no cost to this
1429  // instruction. Visit the instructions using our InstVisitor to account for
1430  // all of the per-instruction logic. The visit tree returns true if we
1431  // consumed the instruction in any way, and false if the instruction's base
1432  // cost should count against inlining.
1433  if (Base::visit(&*I))
1434  ++NumInstructionsSimplified;
1435  else
1437 
1438  using namespace ore;
1439  // If the visit this instruction detected an uninlinable pattern, abort.
1440  if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1441  HasIndirectBr || HasFrameEscape) {
1442  if (ORE)
1443  ORE->emit([&]() {
1444  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1445  CandidateCS.getInstruction())
1446  << NV("Callee", &F)
1447  << " has uninlinable pattern and cost is not fully computed";
1448  });
1449  return false;
1450  }
1451 
1452  // If the caller is a recursive function then we don't want to inline
1453  // functions which allocate a lot of stack space because it would increase
1454  // the caller stack usage dramatically.
1455  if (IsCallerRecursive &&
1457  if (ORE)
1458  ORE->emit([&]() {
1459  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1460  CandidateCS.getInstruction())
1461  << NV("Callee", &F)
1462  << " is recursive and allocates too much stack space. Cost is "
1463  "not fully computed";
1464  });
1465  return false;
1466  }
1467 
1468  // Check if we've past the maximum possible threshold so we don't spin in
1469  // huge basic blocks that will never inline.
1470  if (Cost >= Threshold && !ComputeFullInlineCost)
1471  return false;
1472  }
1473 
1474  return true;
1475 }
1476 
1477 /// \brief Compute the base pointer and cumulative constant offsets for V.
1478 ///
1479 /// This strips all constant offsets off of V, leaving it the base pointer, and
1480 /// accumulates the total constant offset applied in the returned constant. It
1481 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1482 /// no constant offsets applied.
1483 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1484  if (!V->getType()->isPointerTy())
1485  return nullptr;
1486 
1487  unsigned IntPtrWidth = DL.getPointerSizeInBits();
1488  APInt Offset = APInt::getNullValue(IntPtrWidth);
1489 
1490  // Even though we don't look through PHI nodes, we could be called on an
1491  // instruction in an unreachable block, which may be on a cycle.
1492  SmallPtrSet<Value *, 4> Visited;
1493  Visited.insert(V);
1494  do {
1495  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1496  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1497  return nullptr;
1498  V = GEP->getPointerOperand();
1499  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1500  V = cast<Operator>(V)->getOperand(0);
1501  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1502  if (GA->isInterposable())
1503  break;
1504  V = GA->getAliasee();
1505  } else {
1506  break;
1507  }
1508  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1509  } while (Visited.insert(V).second);
1510 
1511  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1512  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1513 }
1514 
1515 /// \brief Analyze a call site for potential inlining.
1516 ///
1517 /// Returns true if inlining this call is viable, and false if it is not
1518 /// viable. It computes the cost and adjusts the threshold based on numerous
1519 /// factors and heuristics. If this method returns false but the computed cost
1520 /// is below the computed threshold, then inlining was forcibly disabled by
1521 /// some artifact of the routine.
1522 bool CallAnalyzer::analyzeCall(CallSite CS) {
1523  ++NumCallsAnalyzed;
1524 
1525  // Perform some tweaks to the cost and threshold based on the direct
1526  // callsite information.
1527 
1528  // We want to more aggressively inline vector-dense kernels, so up the
1529  // threshold, and we'll lower it if the % of vector instructions gets too
1530  // low. Note that these bonuses are some what arbitrary and evolved over time
1531  // by accident as much as because they are principled bonuses.
1532  //
1533  // FIXME: It would be nice to remove all such bonuses. At least it would be
1534  // nice to base the bonus values on something more scientific.
1535  assert(NumInstructions == 0);
1536  assert(NumVectorInstructions == 0);
1537 
1538  // Update the threshold based on callsite properties
1539  updateThreshold(CS, F);
1540 
1541  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1542  // this Threshold any time, and cost cannot decrease, we can stop processing
1543  // the rest of the function body.
1544  Threshold += (SingleBBBonus + VectorBonus);
1545 
1546  // Give out bonuses for the callsite, as the instructions setting them up
1547  // will be gone after inlining.
1548  Cost -= getCallsiteCost(CS, DL);
1549 
1550  // If this function uses the coldcc calling convention, prefer not to inline
1551  // it.
1552  if (F.getCallingConv() == CallingConv::Cold)
1554 
1555  // Check if we're done. This can happen due to bonuses and penalties.
1556  if (Cost >= Threshold && !ComputeFullInlineCost)
1557  return false;
1558 
1559  if (F.empty())
1560  return true;
1561 
1562  Function *Caller = CS.getInstruction()->getParent()->getParent();
1563  // Check if the caller function is recursive itself.
1564  for (User *U : Caller->users()) {
1565  CallSite Site(U);
1566  if (!Site)
1567  continue;
1568  Instruction *I = Site.getInstruction();
1569  if (I->getParent()->getParent() == Caller) {
1570  IsCallerRecursive = true;
1571  break;
1572  }
1573  }
1574 
1575  // Populate our simplified values by mapping from function arguments to call
1576  // arguments with known important simplifications.
1577  CallSite::arg_iterator CAI = CS.arg_begin();
1578  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1579  FAI != FAE; ++FAI, ++CAI) {
1580  assert(CAI != CS.arg_end());
1581  if (Constant *C = dyn_cast<Constant>(CAI))
1582  SimplifiedValues[&*FAI] = C;
1583 
1584  Value *PtrArg = *CAI;
1585  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1586  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1587 
1588  // We can SROA any pointer arguments derived from alloca instructions.
1589  if (isa<AllocaInst>(PtrArg)) {
1590  SROAArgValues[&*FAI] = PtrArg;
1591  SROAArgCosts[PtrArg] = 0;
1592  }
1593  }
1594  }
1595  NumConstantArgs = SimplifiedValues.size();
1596  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1597  NumAllocaArgs = SROAArgValues.size();
1598 
1599  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1600  // the ephemeral values multiple times (and they're completely determined by
1601  // the callee, so this is purely duplicate work).
1603  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1604 
1605  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1606  // adding basic blocks of the callee which can be proven to be dead for this
1607  // particular call site in order to get more accurate cost estimates. This
1608  // requires a somewhat heavyweight iteration pattern: we need to walk the
1609  // basic blocks in a breadth-first order as we insert live successors. To
1610  // accomplish this, prioritizing for small iterations because we exit after
1611  // crossing our threshold, we use a small-size optimized SetVector.
1614  BBSetVector;
1615  BBSetVector BBWorklist;
1616  BBWorklist.insert(&F.getEntryBlock());
1617  bool SingleBB = true;
1618  // Note that we *must not* cache the size, this loop grows the worklist.
1619  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1620  // Bail out the moment we cross the threshold. This means we'll under-count
1621  // the cost, but only when undercounting doesn't matter.
1622  if (Cost >= Threshold && !ComputeFullInlineCost)
1623  break;
1624 
1625  BasicBlock *BB = BBWorklist[Idx];
1626  if (BB->empty())
1627  continue;
1628 
1629  // Disallow inlining a blockaddress. A blockaddress only has defined
1630  // behavior for an indirect branch in the same function, and we do not
1631  // currently support inlining indirect branches. But, the inliner may not
1632  // see an indirect branch that ends up being dead code at a particular call
1633  // site. If the blockaddress escapes the function, e.g., via a global
1634  // variable, inlining may lead to an invalid cross-function reference.
1635  if (BB->hasAddressTaken())
1636  return false;
1637 
1638  // Analyze the cost of this block. If we blow through the threshold, this
1639  // returns false, and we can bail on out.
1640  if (!analyzeBlock(BB, EphValues))
1641  return false;
1642 
1643  TerminatorInst *TI = BB->getTerminator();
1644 
1645  // Add in the live successors by first checking whether we have terminator
1646  // that may be simplified based on the values simplified by this call.
1647  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1648  if (BI->isConditional()) {
1649  Value *Cond = BI->getCondition();
1650  if (ConstantInt *SimpleCond =
1651  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1652  BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1653  continue;
1654  }
1655  }
1656  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1657  Value *Cond = SI->getCondition();
1658  if (ConstantInt *SimpleCond =
1659  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1660  BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
1661  continue;
1662  }
1663  }
1664 
1665  // If we're unable to select a particular successor, just count all of
1666  // them.
1667  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1668  ++TIdx)
1669  BBWorklist.insert(TI->getSuccessor(TIdx));
1670 
1671  // If we had any successors at this point, than post-inlining is likely to
1672  // have them as well. Note that we assume any basic blocks which existed
1673  // due to branches or switches which folded above will also fold after
1674  // inlining.
1675  if (SingleBB && TI->getNumSuccessors() > 1) {
1676  // Take off the bonus we applied to the threshold.
1677  Threshold -= SingleBBBonus;
1678  SingleBB = false;
1679  }
1680  }
1681 
1682  bool OnlyOneCallAndLocalLinkage =
1683  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1684  // If this is a noduplicate call, we can still inline as long as
1685  // inlining this would cause the removal of the caller (so the instruction
1686  // is not actually duplicated, just moved).
1687  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1688  return false;
1689 
1690  // We applied the maximum possible vector bonus at the beginning. Now,
1691  // subtract the excess bonus, if any, from the Threshold before
1692  // comparing against Cost.
1693  if (NumVectorInstructions <= NumInstructions / 10)
1694  Threshold -= VectorBonus;
1695  else if (NumVectorInstructions <= NumInstructions / 2)
1696  Threshold -= VectorBonus/2;
1697 
1698  return Cost < std::max(1, Threshold);
1699 }
1700 
1701 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1702 /// \brief Dump stats about this call's analysis.
1704 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1705  DEBUG_PRINT_STAT(NumConstantArgs);
1706  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1707  DEBUG_PRINT_STAT(NumAllocaArgs);
1708  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1709  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1710  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1711  DEBUG_PRINT_STAT(NumInstructions);
1712  DEBUG_PRINT_STAT(SROACostSavings);
1713  DEBUG_PRINT_STAT(SROACostSavingsLost);
1714  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1715  DEBUG_PRINT_STAT(Cost);
1716  DEBUG_PRINT_STAT(Threshold);
1717 #undef DEBUG_PRINT_STAT
1718 }
1719 #endif
1720 
1721 /// \brief Test that there are no attribute conflicts between Caller and Callee
1722 /// that prevent inlining.
1724  Function *Callee,
1725  TargetTransformInfo &TTI) {
1726  return TTI.areInlineCompatible(Caller, Callee) &&
1727  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1728 }
1729 
1731  int Cost = 0;
1732  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1733  if (CS.isByValArgument(I)) {
1734  // We approximate the number of loads and stores needed by dividing the
1735  // size of the byval type by the target's pointer size.
1736  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1737  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1738  unsigned PointerSize = DL.getPointerSizeInBits();
1739  // Ceiling division.
1740  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1741 
1742  // If it generates more than 8 stores it is likely to be expanded as an
1743  // inline memcpy so we take that as an upper bound. Otherwise we assume
1744  // one load and one store per word copied.
1745  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1746  // here instead of a magic number of 8, but it's not available via
1747  // DataLayout.
1748  NumStores = std::min(NumStores, 8U);
1749 
1750  Cost += 2 * NumStores * InlineConstants::InstrCost;
1751  } else {
1752  // For non-byval arguments subtract off one instruction per call
1753  // argument.
1755  }
1756  }
1757  // The call instruction also disappears after inlining.
1758  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1759  return Cost;
1760 }
1761 
1763  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1764  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1767  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1768  GetAssumptionCache, GetBFI, PSI, ORE);
1769 }
1770 
1772  CallSite CS, Function *Callee, const InlineParams &Params,
1773  TargetTransformInfo &CalleeTTI,
1774  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1777 
1778  // Cannot inline indirect calls.
1779  if (!Callee)
1780  return llvm::InlineCost::getNever();
1781 
1782  // Calls to functions with always-inline attributes should be inlined
1783  // whenever possible.
1784  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1785  if (isInlineViable(*Callee))
1786  return llvm::InlineCost::getAlways();
1787  return llvm::InlineCost::getNever();
1788  }
1789 
1790  // Never inline functions with conflicting attributes (unless callee has
1791  // always-inline attribute).
1792  Function *Caller = CS.getCaller();
1793  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
1794  return llvm::InlineCost::getNever();
1795 
1796  // Don't inline this call if the caller has the optnone attribute.
1797  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
1798  return llvm::InlineCost::getNever();
1799 
1800  // Don't inline functions which can be interposed at link-time. Don't inline
1801  // functions marked noinline or call sites marked noinline.
1802  // Note: inlining non-exact non-interposable functions is fine, since we know
1803  // we have *a* correct implementation of the source level function.
1804  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1805  CS.isNoInline())
1806  return llvm::InlineCost::getNever();
1807 
1808  DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
1809  << "... (caller:" << Caller->getName() << ")\n");
1810 
1811  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
1812  Params);
1813  bool ShouldInline = CA.analyzeCall(CS);
1814 
1815  DEBUG(CA.dump());
1816 
1817  // Check if there was a reason to force inlining or no inlining.
1818  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1819  return InlineCost::getNever();
1820  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1821  return InlineCost::getAlways();
1822 
1823  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1824 }
1825 
1827  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1828  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1829  // Disallow inlining of functions which contain indirect branches or
1830  // blockaddresses.
1831  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1832  return false;
1833 
1834  for (auto &II : *BI) {
1835  CallSite CS(&II);
1836  if (!CS)
1837  continue;
1838 
1839  // Disallow recursive calls.
1840  if (&F == CS.getCalledFunction())
1841  return false;
1842 
1843  // Disallow calls which expose returns-twice to a function not previously
1844  // attributed as such.
1845  if (!ReturnsTwice && CS.isCall() &&
1846  cast<CallInst>(CS.getInstruction())->canReturnTwice())
1847  return false;
1848 
1849  // Disallow inlining functions that call @llvm.localescape. Doing this
1850  // correctly would require major changes to the inliner.
1851  if (CS.getCalledFunction() &&
1853  llvm::Intrinsic::localescape)
1854  return false;
1855  }
1856  }
1857 
1858  return true;
1859 }
1860 
1861 // APIs to create InlineParams based on command line flags and/or other
1862 // parameters.
1863 
1865  InlineParams Params;
1866 
1867  // This field is the threshold to use for a callee by default. This is
1868  // derived from one or more of:
1869  // * optimization or size-optimization levels,
1870  // * a value passed to createFunctionInliningPass function, or
1871  // * the -inline-threshold flag.
1872  // If the -inline-threshold flag is explicitly specified, that is used
1873  // irrespective of anything else.
1874  if (InlineThreshold.getNumOccurrences() > 0)
1876  else
1877  Params.DefaultThreshold = Threshold;
1878 
1879  // Set the HintThreshold knob from the -inlinehint-threshold.
1880  Params.HintThreshold = HintThreshold;
1881 
1882  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1883  Params.HotCallSiteThreshold = HotCallSiteThreshold;
1884 
1885  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
1886  // populate LocallyHotCallSiteThreshold. Later, we populate
1887  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
1888  // we know that optimization level is O3 (in the getInlineParams variant that
1889  // takes the opt and size levels).
1890  // FIXME: Remove this check (and make the assignment unconditional) after
1891  // addressing size regression issues at O2.
1892  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
1894 
1895  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
1897 
1898  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1899  // -inlinehint-threshold commandline option is not explicitly given. If that
1900  // option is present, then its value applies even for callees with size and
1901  // minsize attributes.
1902  // If the -inline-threshold is not specified, set the ColdThreshold from the
1903  // -inlinecold-threshold even if it is not explicitly passed. If
1904  // -inline-threshold is specified, then -inlinecold-threshold needs to be
1905  // explicitly specified to set the ColdThreshold knob
1906  if (InlineThreshold.getNumOccurrences() == 0) {
1909  Params.ColdThreshold = ColdThreshold;
1910  } else if (ColdThreshold.getNumOccurrences() > 0) {
1911  Params.ColdThreshold = ColdThreshold;
1912  }
1913  return Params;
1914 }
1915 
1918 }
1919 
1920 // Compute the default threshold for inlining based on the opt level and the
1921 // size opt level.
1922 static int computeThresholdFromOptLevels(unsigned OptLevel,
1923  unsigned SizeOptLevel) {
1924  if (OptLevel > 2)
1926  if (SizeOptLevel == 1) // -Os
1928  if (SizeOptLevel == 2) // -Oz
1930  return InlineThreshold;
1931 }
1932 
1933 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1934  auto Params =
1935  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1936  // At O3, use the value of -locally-hot-callsite-threshold option to populate
1937  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
1938  // when it is specified explicitly.
1939  if (OptLevel > 2)
1941  return Params;
1942 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:130
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
bool isSimple() const
Definition: Instructions.h:262
bool empty() const
Definition: Function.h:594
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:101
bool hasLocalLinkage() const
Definition: GlobalValue.h:427
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:523
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:141
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
iterator end()
Definition: Function.h:590
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:562
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
const int ColdccPenalty
Definition: InlineCost.h:48
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1665
A cache of .assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:411
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:612
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:344
STATISTIC(NumFunctions, "Total number of functions")
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2126
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:380
op_iterator op_begin()
Definition: User.h:214
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:768
Represents the cost of inlining a function.
Definition: InlineCost.h:65
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:438
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:491
This class represents the LLVM &#39;select&#39; instruction.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:135
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:575
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:92
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
bool empty() const
Definition: BasicBlock.h:263
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:922
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:813
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:891
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1854
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:962
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:47
An instruction for storing to memory.
Definition: Instructions.h:306
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:588
const int CallPenalty
Definition: InlineCost.h:46
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2048
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:45
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:362
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
op_iterator op_end()
Definition: User.h:216
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:144
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
static InlineCost getNever()
Definition: InlineCost.h:89
op_range operands()
Definition: User.h:222
Value * getPointerOperand()
Definition: Instructions.h:270
arg_iterator arg_begin()
Definition: Function.h:603
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1930
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:151
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:831
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
IterTy arg_begin() const
Definition: CallSite.h:571
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_TYPE
Definition: InlineCost.cpp:40
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
bool isConditional() const
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:138
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:401
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1435
const Value * getFalseValue() const
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:530
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:513
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1652
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:598
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:52
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:154
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:414
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2072
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static InlineCost getAlways()
Definition: InlineCost.h:86
Value * getPointerOperand()
Definition: Instructions.h:398
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:147
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:132
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:81
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)