LLVM  6.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/GlobalAlias.h"
32 #include "llvm/IR/InstVisitor.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Operator.h"
35 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "inline-cost"
41 
42 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
43 
45  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
46  cl::desc("Control the amount of inlining to perform (default = 225)"));
47 
49  "inlinehint-threshold", cl::Hidden, cl::init(325),
50  cl::desc("Threshold for inlining functions with inline hint"));
51 
52 static cl::opt<int>
53  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
54  cl::init(45),
55  cl::desc("Threshold for inlining cold callsites"));
56 
57 // We introduce this threshold to help performance of instrumentation based
58 // PGO before we actually hook up inliner with analysis passes such as BPI and
59 // BFI.
61  "inlinecold-threshold", cl::Hidden, cl::init(45),
62  cl::desc("Threshold for inlining functions with cold attribute"));
63 
64 static cl::opt<int>
65  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
67  cl::desc("Threshold for hot callsites "));
68 
70  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
71  cl::desc("Threshold for locally hot callsites "));
72 
74  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
75  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
76  "entry frequency, for a callsite to be cold in the absence of "
77  "profile information."));
78 
80  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
81  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
82  "entry frequency, for a callsite to be hot in the absence of "
83  "profile information."));
84 
86  "inline-cost-full", cl::Hidden, cl::init(false),
87  cl::desc("Compute the full inline cost of a call site even when the cost "
88  "exceeds the threshold."));
89 
90 namespace {
91 
92 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
94  friend class InstVisitor<CallAnalyzer, bool>;
95 
96  /// The TargetTransformInfo available for this compilation.
97  const TargetTransformInfo &TTI;
98 
99  /// Getter for the cache of @llvm.assume intrinsics.
100  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
101 
102  /// Getter for BlockFrequencyInfo
104 
105  /// Profile summary information.
106  ProfileSummaryInfo *PSI;
107 
108  /// The called function.
109  Function &F;
110 
111  // Cache the DataLayout since we use it a lot.
112  const DataLayout &DL;
113 
114  /// The OptimizationRemarkEmitter available for this compilation.
116 
117  /// The candidate callsite being analyzed. Please do not use this to do
118  /// analysis in the caller function; we want the inline cost query to be
119  /// easily cacheable. Instead, use the cover function paramHasAttr.
120  CallSite CandidateCS;
121 
122  /// Tunable parameters that control the analysis.
123  const InlineParams &Params;
124 
125  int Threshold;
126  int Cost;
127  bool ComputeFullInlineCost;
128 
129  bool IsCallerRecursive;
130  bool IsRecursiveCall;
131  bool ExposesReturnsTwice;
132  bool HasDynamicAlloca;
133  bool ContainsNoDuplicateCall;
134  bool HasReturn;
135  bool HasIndirectBr;
136  bool HasFrameEscape;
137 
138  /// Number of bytes allocated statically by the callee.
139  uint64_t AllocatedSize;
140  unsigned NumInstructions, NumVectorInstructions;
141  int VectorBonus, TenPercentVectorBonus;
142  // Bonus to be applied when the callee has only one reachable basic block.
143  int SingleBBBonus;
144 
145  /// While we walk the potentially-inlined instructions, we build up and
146  /// maintain a mapping of simplified values specific to this callsite. The
147  /// idea is to propagate any special information we have about arguments to
148  /// this call through the inlinable section of the function, and account for
149  /// likely simplifications post-inlining. The most important aspect we track
150  /// is CFG altering simplifications -- when we prove a basic block dead, that
151  /// can cause dramatic shifts in the cost of inlining a function.
152  DenseMap<Value *, Constant *> SimplifiedValues;
153 
154  /// Keep track of the values which map back (through function arguments) to
155  /// allocas on the caller stack which could be simplified through SROA.
156  DenseMap<Value *, Value *> SROAArgValues;
157 
158  /// The mapping of caller Alloca values to their accumulated cost savings. If
159  /// we have to disable SROA for one of the allocas, this tells us how much
160  /// cost must be added.
161  DenseMap<Value *, int> SROAArgCosts;
162 
163  /// Keep track of values which map to a pointer base and constant offset.
165 
166  // Custom simplification helper routines.
167  bool isAllocaDerivedArg(Value *V);
168  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
170  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
171  void disableSROA(Value *V);
172  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
173  int InstructionCost);
174  bool isGEPFree(GetElementPtrInst &GEP);
175  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
176  bool simplifyCallSite(Function *F, CallSite CS);
177  template <typename Callable>
178  bool simplifyInstruction(Instruction &I, Callable Evaluate);
179  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
180 
181  /// Return true if the given argument to the function being considered for
182  /// inlining has the given attribute set either at the call site or the
183  /// function declaration. Primarily used to inspect call site specific
184  /// attributes since these can be more precise than the ones on the callee
185  /// itself.
186  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
187 
188  /// Return true if the given value is known non null within the callee if
189  /// inlined through this particular callsite.
190  bool isKnownNonNullInCallee(Value *V);
191 
192  /// Update Threshold based on callsite properties such as callee
193  /// attributes and callee hotness for PGO builds. The Callee is explicitly
194  /// passed to support analyzing indirect calls whose target is inferred by
195  /// analysis.
196  void updateThreshold(CallSite CS, Function &Callee);
197 
198  /// Return true if size growth is allowed when inlining the callee at CS.
199  bool allowSizeGrowth(CallSite CS);
200 
201  /// Return true if \p CS is a cold callsite.
202  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
203 
204  /// Return a higher threshold if \p CS is a hot callsite.
205  Optional<int> getHotCallSiteThreshold(CallSite CS,
206  BlockFrequencyInfo *CallerBFI);
207 
208  // Custom analysis routines.
209  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
210 
211  // Disable several entry points to the visitor so we don't accidentally use
212  // them by declaring but not defining them here.
213  void visit(Module *);
214  void visit(Module &);
215  void visit(Function *);
216  void visit(Function &);
217  void visit(BasicBlock *);
218  void visit(BasicBlock &);
219 
220  // Provide base case for our instruction visit.
221  bool visitInstruction(Instruction &I);
222 
223  // Our visit overrides.
224  bool visitAlloca(AllocaInst &I);
225  bool visitPHI(PHINode &I);
226  bool visitGetElementPtr(GetElementPtrInst &I);
227  bool visitBitCast(BitCastInst &I);
228  bool visitPtrToInt(PtrToIntInst &I);
229  bool visitIntToPtr(IntToPtrInst &I);
230  bool visitCastInst(CastInst &I);
231  bool visitUnaryInstruction(UnaryInstruction &I);
232  bool visitCmpInst(CmpInst &I);
233  bool visitAnd(BinaryOperator &I);
234  bool visitOr(BinaryOperator &I);
235  bool visitSub(BinaryOperator &I);
236  bool visitBinaryOperator(BinaryOperator &I);
237  bool visitLoad(LoadInst &I);
238  bool visitStore(StoreInst &I);
239  bool visitExtractValue(ExtractValueInst &I);
240  bool visitInsertValue(InsertValueInst &I);
241  bool visitCallSite(CallSite CS);
242  bool visitReturnInst(ReturnInst &RI);
243  bool visitBranchInst(BranchInst &BI);
244  bool visitSwitchInst(SwitchInst &SI);
245  bool visitIndirectBrInst(IndirectBrInst &IBI);
246  bool visitResumeInst(ResumeInst &RI);
247  bool visitCleanupReturnInst(CleanupReturnInst &RI);
248  bool visitCatchReturnInst(CatchReturnInst &RI);
249  bool visitUnreachableInst(UnreachableInst &I);
250 
251 public:
252  CallAnalyzer(const TargetTransformInfo &TTI,
253  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
256  Function &Callee, CallSite CSArg, const InlineParams &Params)
257  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
258  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
259  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
260  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
261  Params.ComputeFullInlineCost || ORE),
262  IsCallerRecursive(false), IsRecursiveCall(false),
263  ExposesReturnsTwice(false), HasDynamicAlloca(false),
264  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
265  HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
266  NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
267  NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
268  NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
269  NumInstructionsSimplified(0), SROACostSavings(0),
270  SROACostSavingsLost(0) {}
271 
272  bool analyzeCall(CallSite CS);
273 
274  int getThreshold() { return Threshold; }
275  int getCost() { return Cost; }
276 
277  // Keep a bunch of stats about the cost savings found so we can print them
278  // out when debugging.
279  unsigned NumConstantArgs;
280  unsigned NumConstantOffsetPtrArgs;
281  unsigned NumAllocaArgs;
282  unsigned NumConstantPtrCmps;
283  unsigned NumConstantPtrDiffs;
284  unsigned NumInstructionsSimplified;
285  unsigned SROACostSavings;
286  unsigned SROACostSavingsLost;
287 
288  void dump();
289 };
290 
291 } // namespace
292 
293 /// \brief Test whether the given value is an Alloca-derived function argument.
294 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
295  return SROAArgValues.count(V);
296 }
297 
298 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
299 /// Returns false if V does not map to a SROA-candidate.
300 bool CallAnalyzer::lookupSROAArgAndCost(
302  if (SROAArgValues.empty() || SROAArgCosts.empty())
303  return false;
304 
305  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
306  if (ArgIt == SROAArgValues.end())
307  return false;
308 
309  Arg = ArgIt->second;
310  CostIt = SROAArgCosts.find(Arg);
311  return CostIt != SROAArgCosts.end();
312 }
313 
314 /// \brief Disable SROA for the candidate marked by this cost iterator.
315 ///
316 /// This marks the candidate as no longer viable for SROA, and adds the cost
317 /// savings associated with it back into the inline cost measurement.
318 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
319  // If we're no longer able to perform SROA we need to undo its cost savings
320  // and prevent subsequent analysis.
321  Cost += CostIt->second;
322  SROACostSavings -= CostIt->second;
323  SROACostSavingsLost += CostIt->second;
324  SROAArgCosts.erase(CostIt);
325 }
326 
327 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
328 void CallAnalyzer::disableSROA(Value *V) {
329  Value *SROAArg;
331  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
332  disableSROA(CostIt);
333 }
334 
335 /// \brief Accumulate the given cost for a particular SROA candidate.
336 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
337  int InstructionCost) {
338  CostIt->second += InstructionCost;
339  SROACostSavings += InstructionCost;
340 }
341 
342 /// \brief Accumulate a constant GEP offset into an APInt if possible.
343 ///
344 /// Returns false if unable to compute the offset for any reason. Respects any
345 /// simplified values known during the analysis of this callsite.
346 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
347  unsigned IntPtrWidth = DL.getPointerSizeInBits();
348  assert(IntPtrWidth == Offset.getBitWidth());
349 
350  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
351  GTI != GTE; ++GTI) {
352  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
353  if (!OpC)
354  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
355  OpC = dyn_cast<ConstantInt>(SimpleOp);
356  if (!OpC)
357  return false;
358  if (OpC->isZero())
359  continue;
360 
361  // Handle a struct index, which adds its field offset to the pointer.
362  if (StructType *STy = GTI.getStructTypeOrNull()) {
363  unsigned ElementIdx = OpC->getZExtValue();
364  const StructLayout *SL = DL.getStructLayout(STy);
365  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
366  continue;
367  }
368 
369  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
370  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
371  }
372  return true;
373 }
374 
375 /// \brief Use TTI to check whether a GEP is free.
376 ///
377 /// Respects any simplified values known during the analysis of this callsite.
378 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
379  SmallVector<Value *, 4> Operands;
380  Operands.push_back(GEP.getOperand(0));
381  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
382  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
383  Operands.push_back(SimpleOp);
384  else
385  Operands.push_back(*I);
386  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
387 }
388 
389 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
390  // Check whether inlining will turn a dynamic alloca into a static
391  // alloca and handle that case.
392  if (I.isArrayAllocation()) {
393  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
394  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
395  Type *Ty = I.getAllocatedType();
396  AllocatedSize = SaturatingMultiplyAdd(
397  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
398  return Base::visitAlloca(I);
399  }
400  }
401 
402  // Accumulate the allocated size.
403  if (I.isStaticAlloca()) {
404  Type *Ty = I.getAllocatedType();
405  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
406  }
407 
408  // We will happily inline static alloca instructions.
409  if (I.isStaticAlloca())
410  return Base::visitAlloca(I);
411 
412  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
413  // a variety of reasons, and so we would like to not inline them into
414  // functions which don't currently have a dynamic alloca. This simply
415  // disables inlining altogether in the presence of a dynamic alloca.
416  HasDynamicAlloca = true;
417  return false;
418 }
419 
420 bool CallAnalyzer::visitPHI(PHINode &I) {
421  // FIXME: We should potentially be tracking values through phi nodes,
422  // especially when they collapse to a single value due to deleted CFG edges
423  // during inlining.
424 
425  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
426  // though we don't want to propagate it's bonuses. The idea is to disable
427  // SROA if it *might* be used in an inappropriate manner.
428 
429  // Phi nodes are always zero-cost.
430  return true;
431 }
432 
433 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
434  Value *SROAArg;
436  bool SROACandidate =
437  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
438 
439  // Try to fold GEPs of constant-offset call site argument pointers. This
440  // requires target data and inbounds GEPs.
441  if (I.isInBounds()) {
442  // Check if we have a base + offset for the pointer.
443  Value *Ptr = I.getPointerOperand();
444  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
445  if (BaseAndOffset.first) {
446  // Check if the offset of this GEP is constant, and if so accumulate it
447  // into Offset.
448  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
449  // Non-constant GEPs aren't folded, and disable SROA.
450  if (SROACandidate)
451  disableSROA(CostIt);
452  return isGEPFree(I);
453  }
454 
455  // Add the result as a new mapping to Base + Offset.
456  ConstantOffsetPtrs[&I] = BaseAndOffset;
457 
458  // Also handle SROA candidates here, we already know that the GEP is
459  // all-constant indexed.
460  if (SROACandidate)
461  SROAArgValues[&I] = SROAArg;
462 
463  return true;
464  }
465  }
466 
467  // Lambda to check whether a GEP's indices are all constant.
468  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
469  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
470  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
471  return false;
472  return true;
473  };
474 
475  if (IsGEPOffsetConstant(I)) {
476  if (SROACandidate)
477  SROAArgValues[&I] = SROAArg;
478 
479  // Constant GEPs are modeled as free.
480  return true;
481  }
482 
483  // Variable GEPs will require math and will disable SROA.
484  if (SROACandidate)
485  disableSROA(CostIt);
486  return isGEPFree(I);
487 }
488 
489 /// Simplify \p I if its operands are constants and update SimplifiedValues.
490 /// \p Evaluate is a callable specific to instruction type that evaluates the
491 /// instruction when all the operands are constants.
492 template <typename Callable>
493 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
495  for (Value *Op : I.operands()) {
496  Constant *COp = dyn_cast<Constant>(Op);
497  if (!COp)
498  COp = SimplifiedValues.lookup(Op);
499  if (!COp)
500  return false;
501  COps.push_back(COp);
502  }
503  auto *C = Evaluate(COps);
504  if (!C)
505  return false;
506  SimplifiedValues[&I] = C;
507  return true;
508 }
509 
510 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
511  // Propagate constants through bitcasts.
512  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
513  return ConstantExpr::getBitCast(COps[0], I.getType());
514  }))
515  return true;
516 
517  // Track base/offsets through casts
518  std::pair<Value *, APInt> BaseAndOffset =
519  ConstantOffsetPtrs.lookup(I.getOperand(0));
520  // Casts don't change the offset, just wrap it up.
521  if (BaseAndOffset.first)
522  ConstantOffsetPtrs[&I] = BaseAndOffset;
523 
524  // Also look for SROA candidates here.
525  Value *SROAArg;
527  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
528  SROAArgValues[&I] = SROAArg;
529 
530  // Bitcasts are always zero cost.
531  return true;
532 }
533 
534 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
535  // Propagate constants through ptrtoint.
536  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
537  return ConstantExpr::getPtrToInt(COps[0], I.getType());
538  }))
539  return true;
540 
541  // Track base/offset pairs when converted to a plain integer provided the
542  // integer is large enough to represent the pointer.
543  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
544  if (IntegerSize >= DL.getPointerSizeInBits()) {
545  std::pair<Value *, APInt> BaseAndOffset =
546  ConstantOffsetPtrs.lookup(I.getOperand(0));
547  if (BaseAndOffset.first)
548  ConstantOffsetPtrs[&I] = BaseAndOffset;
549  }
550 
551  // This is really weird. Technically, ptrtoint will disable SROA. However,
552  // unless that ptrtoint is *used* somewhere in the live basic blocks after
553  // inlining, it will be nuked, and SROA should proceed. All of the uses which
554  // would block SROA would also block SROA if applied directly to a pointer,
555  // and so we can just add the integer in here. The only places where SROA is
556  // preserved either cannot fire on an integer, or won't in-and-of themselves
557  // disable SROA (ext) w/o some later use that we would see and disable.
558  Value *SROAArg;
560  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
561  SROAArgValues[&I] = SROAArg;
562 
563  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
564 }
565 
566 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
567  // Propagate constants through ptrtoint.
568  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
569  return ConstantExpr::getIntToPtr(COps[0], I.getType());
570  }))
571  return true;
572 
573  // Track base/offset pairs when round-tripped through a pointer without
574  // modifications provided the integer is not too large.
575  Value *Op = I.getOperand(0);
576  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
577  if (IntegerSize <= DL.getPointerSizeInBits()) {
578  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
579  if (BaseAndOffset.first)
580  ConstantOffsetPtrs[&I] = BaseAndOffset;
581  }
582 
583  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
584  Value *SROAArg;
586  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
587  SROAArgValues[&I] = SROAArg;
588 
589  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
590 }
591 
592 bool CallAnalyzer::visitCastInst(CastInst &I) {
593  // Propagate constants through ptrtoint.
594  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
595  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
596  }))
597  return true;
598 
599  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
600  disableSROA(I.getOperand(0));
601 
602  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
603 }
604 
605 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
606  Value *Operand = I.getOperand(0);
607  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
608  return ConstantFoldInstOperands(&I, COps[0], DL);
609  }))
610  return true;
611 
612  // Disable any SROA on the argument to arbitrary unary operators.
613  disableSROA(Operand);
614 
615  return false;
616 }
617 
618 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
619  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
620 }
621 
622 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
623  // Does the *call site* have the NonNull attribute set on an argument? We
624  // use the attribute on the call site to memoize any analysis done in the
625  // caller. This will also trip if the callee function has a non-null
626  // parameter attribute, but that's a less interesting case because hopefully
627  // the callee would already have been simplified based on that.
628  if (Argument *A = dyn_cast<Argument>(V))
629  if (paramHasAttr(A, Attribute::NonNull))
630  return true;
631 
632  // Is this an alloca in the caller? This is distinct from the attribute case
633  // above because attributes aren't updated within the inliner itself and we
634  // always want to catch the alloca derived case.
635  if (isAllocaDerivedArg(V))
636  // We can actually predict the result of comparisons between an
637  // alloca-derived value and null. Note that this fires regardless of
638  // SROA firing.
639  return true;
640 
641  return false;
642 }
643 
644 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
645  // If the normal destination of the invoke or the parent block of the call
646  // site is unreachable-terminated, there is little point in inlining this
647  // unless there is literally zero cost.
648  // FIXME: Note that it is possible that an unreachable-terminated block has a
649  // hot entry. For example, in below scenario inlining hot_call_X() may be
650  // beneficial :
651  // main() {
652  // hot_call_1();
653  // ...
654  // hot_call_N()
655  // exit(0);
656  // }
657  // For now, we are not handling this corner case here as it is rare in real
658  // code. In future, we should elaborate this based on BPI and BFI in more
659  // general threshold adjusting heuristics in updateThreshold().
660  Instruction *Instr = CS.getInstruction();
661  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
662  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
663  return false;
664  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
665  return false;
666 
667  return true;
668 }
669 
670 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
671  // If global profile summary is available, then callsite's coldness is
672  // determined based on that.
673  if (PSI && PSI->hasProfileSummary())
674  return PSI->isColdCallSite(CS, CallerBFI);
675 
676  // Otherwise we need BFI to be available.
677  if (!CallerBFI)
678  return false;
679 
680  // Determine if the callsite is cold relative to caller's entry. We could
681  // potentially cache the computation of scaled entry frequency, but the added
682  // complexity is not worth it unless this scaling shows up high in the
683  // profiles.
684  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
685  auto CallSiteBB = CS.getInstruction()->getParent();
686  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
687  auto CallerEntryFreq =
688  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
689  return CallSiteFreq < CallerEntryFreq * ColdProb;
690 }
691 
693 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
694  BlockFrequencyInfo *CallerBFI) {
695 
696  // If global profile summary is available, then callsite's hotness is
697  // determined based on that.
698  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
699  return Params.HotCallSiteThreshold;
700 
701  // Otherwise we need BFI to be available and to have a locally hot callsite
702  // threshold.
703  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
704  return None;
705 
706  // Determine if the callsite is hot relative to caller's entry. We could
707  // potentially cache the computation of scaled entry frequency, but the added
708  // complexity is not worth it unless this scaling shows up high in the
709  // profiles.
710  auto CallSiteBB = CS.getInstruction()->getParent();
711  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
712  auto CallerEntryFreq = CallerBFI->getEntryFreq();
713  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
714  return Params.LocallyHotCallSiteThreshold;
715 
716  // Otherwise treat it normally.
717  return None;
718 }
719 
720 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
721  // If no size growth is allowed for this inlining, set Threshold to 0.
722  if (!allowSizeGrowth(CS)) {
723  Threshold = 0;
724  return;
725  }
726 
727  Function *Caller = CS.getCaller();
728 
729  // return min(A, B) if B is valid.
730  auto MinIfValid = [](int A, Optional<int> B) {
731  return B ? std::min(A, B.getValue()) : A;
732  };
733 
734  // return max(A, B) if B is valid.
735  auto MaxIfValid = [](int A, Optional<int> B) {
736  return B ? std::max(A, B.getValue()) : A;
737  };
738 
739  // Various bonus percentages. These are multiplied by Threshold to get the
740  // bonus values.
741  // SingleBBBonus: This bonus is applied if the callee has a single reachable
742  // basic block at the given callsite context. This is speculatively applied
743  // and withdrawn if more than one basic block is seen.
744  //
745  // Vector bonuses: We want to more aggressively inline vector-dense kernels
746  // and apply this bonus based on the percentage of vector instructions. A
747  // bonus is applied if the vector instructions exceed 50% and half that amount
748  // is applied if it exceeds 10%. Note that these bonuses are some what
749  // arbitrary and evolved over time by accident as much as because they are
750  // principled bonuses.
751  // FIXME: It would be nice to base the bonus values on something more
752  // scientific.
753  //
754  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
755  // of the last call to a static function as inlining such functions is
756  // guaranteed to reduce code size.
757  //
758  // These bonus percentages may be set to 0 based on properties of the caller
759  // and the callsite.
760  int SingleBBBonusPercent = 50;
761  int VectorBonusPercent = 150;
763 
764  // Lambda to set all the above bonus and bonus percentages to 0.
765  auto DisallowAllBonuses = [&]() {
766  SingleBBBonusPercent = 0;
767  VectorBonusPercent = 0;
768  LastCallToStaticBonus = 0;
769  };
770 
771  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
772  // and reduce the threshold if the caller has the necessary attribute.
773  if (Caller->optForMinSize()) {
774  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
775  // For minsize, we want to disable the single BB bonus and the vector
776  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
777  // a static function will, at the minimum, eliminate the parameter setup and
778  // call/return instructions.
779  SingleBBBonusPercent = 0;
780  VectorBonusPercent = 0;
781  } else if (Caller->optForSize())
782  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
783 
784  // Adjust the threshold based on inlinehint attribute and profile based
785  // hotness information if the caller does not have MinSize attribute.
786  if (!Caller->optForMinSize()) {
787  if (Callee.hasFnAttribute(Attribute::InlineHint))
788  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
789 
790  // FIXME: After switching to the new passmanager, simplify the logic below
791  // by checking only the callsite hotness/coldness as we will reliably
792  // have local profile information.
793  //
794  // Callsite hotness and coldness can be determined if sample profile is
795  // used (which adds hotness metadata to calls) or if caller's
796  // BlockFrequencyInfo is available.
797  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
798  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
799  if (!Caller->optForSize() && HotCallSiteThreshold) {
800  DEBUG(dbgs() << "Hot callsite.\n");
801  // FIXME: This should update the threshold only if it exceeds the
802  // current threshold, but AutoFDO + ThinLTO currently relies on this
803  // behavior to prevent inlining of hot callsites during ThinLTO
804  // compile phase.
805  Threshold = HotCallSiteThreshold.getValue();
806  } else if (isColdCallSite(CS, CallerBFI)) {
807  DEBUG(dbgs() << "Cold callsite.\n");
808  // Do not apply bonuses for a cold callsite including the
809  // LastCallToStatic bonus. While this bonus might result in code size
810  // reduction, it can cause the size of a non-cold caller to increase
811  // preventing it from being inlined.
812  DisallowAllBonuses();
813  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
814  } else if (PSI) {
815  // Use callee's global profile information only if we have no way of
816  // determining this via callsite information.
817  if (PSI->isFunctionEntryHot(&Callee)) {
818  DEBUG(dbgs() << "Hot callee.\n");
819  // If callsite hotness can not be determined, we may still know
820  // that the callee is hot and treat it as a weaker hint for threshold
821  // increase.
822  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
823  } else if (PSI->isFunctionEntryCold(&Callee)) {
824  DEBUG(dbgs() << "Cold callee.\n");
825  // Do not apply bonuses for a cold callee including the
826  // LastCallToStatic bonus. While this bonus might result in code size
827  // reduction, it can cause the size of a non-cold caller to increase
828  // preventing it from being inlined.
829  DisallowAllBonuses();
830  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
831  }
832  }
833  }
834 
835  // Finally, take the target-specific inlining threshold multiplier into
836  // account.
837  Threshold *= TTI.getInliningThresholdMultiplier();
838 
839  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
840  VectorBonus = Threshold * VectorBonusPercent / 100;
841 
842  bool OnlyOneCallAndLocalLinkage =
843  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
844  // If there is only one call of the function, and it has internal linkage,
845  // the cost of inlining it drops dramatically. It may seem odd to update
846  // Cost in updateThreshold, but the bonus depends on the logic in this method.
847  if (OnlyOneCallAndLocalLinkage)
848  Cost -= LastCallToStaticBonus;
849 }
850 
851 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
852  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
853  // First try to handle simplified comparisons.
854  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
855  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
856  }))
857  return true;
858 
859  if (I.getOpcode() == Instruction::FCmp)
860  return false;
861 
862  // Otherwise look for a comparison between constant offset pointers with
863  // a common base.
864  Value *LHSBase, *RHSBase;
865  APInt LHSOffset, RHSOffset;
866  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
867  if (LHSBase) {
868  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
869  if (RHSBase && LHSBase == RHSBase) {
870  // We have common bases, fold the icmp to a constant based on the
871  // offsets.
872  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
873  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
874  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
875  SimplifiedValues[&I] = C;
876  ++NumConstantPtrCmps;
877  return true;
878  }
879  }
880  }
881 
882  // If the comparison is an equality comparison with null, we can simplify it
883  // if we know the value (argument) can't be null
884  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
885  isKnownNonNullInCallee(I.getOperand(0))) {
886  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
887  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
889  return true;
890  }
891  // Finally check for SROA candidates in comparisons.
892  Value *SROAArg;
894  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
895  if (isa<ConstantPointerNull>(I.getOperand(1))) {
896  accumulateSROACost(CostIt, InlineConstants::InstrCost);
897  return true;
898  }
899 
900  disableSROA(CostIt);
901  }
902 
903  return false;
904 }
905 
906 bool CallAnalyzer::visitOr(BinaryOperator &I) {
907  // This is necessary because the generic simplify instruction only works if
908  // both operands are constants.
909  for (unsigned i = 0; i < 2; ++i) {
910  if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
911  SimplifiedValues.lookup(I.getOperand(i))))
912  if (C->isAllOnesValue()) {
913  SimplifiedValues[&I] = C;
914  return true;
915  }
916  }
917  return Base::visitOr(I);
918 }
919 
920 bool CallAnalyzer::visitAnd(BinaryOperator &I) {
921  // This is necessary because the generic simplify instruction only works if
922  // both operands are constants.
923  for (unsigned i = 0; i < 2; ++i) {
924  if (ConstantInt *C = dyn_cast_or_null<ConstantInt>(
925  SimplifiedValues.lookup(I.getOperand(i))))
926  if (C->isZero()) {
927  SimplifiedValues[&I] = C;
928  return true;
929  }
930  }
931  return Base::visitAnd(I);
932 }
933 
934 bool CallAnalyzer::visitSub(BinaryOperator &I) {
935  // Try to handle a special case: we can fold computing the difference of two
936  // constant-related pointers.
937  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
938  Value *LHSBase, *RHSBase;
939  APInt LHSOffset, RHSOffset;
940  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
941  if (LHSBase) {
942  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
943  if (RHSBase && LHSBase == RHSBase) {
944  // We have common bases, fold the subtract to a constant based on the
945  // offsets.
946  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
947  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
948  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
949  SimplifiedValues[&I] = C;
950  ++NumConstantPtrDiffs;
951  return true;
952  }
953  }
954  }
955 
956  // Otherwise, fall back to the generic logic for simplifying and handling
957  // instructions.
958  return Base::visitSub(I);
959 }
960 
961 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
962  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
963  auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
964  Value *SimpleV = nullptr;
965  if (auto FI = dyn_cast<FPMathOperator>(&I))
966  SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
967  FI->getFastMathFlags(), DL);
968  else
969  SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
970  return dyn_cast_or_null<Constant>(SimpleV);
971  };
972 
973  if (simplifyInstruction(I, Evaluate))
974  return true;
975 
976  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
977  disableSROA(LHS);
978  disableSROA(RHS);
979 
980  return false;
981 }
982 
983 bool CallAnalyzer::visitLoad(LoadInst &I) {
984  Value *SROAArg;
986  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
987  if (I.isSimple()) {
988  accumulateSROACost(CostIt, InlineConstants::InstrCost);
989  return true;
990  }
991 
992  disableSROA(CostIt);
993  }
994 
995  return false;
996 }
997 
998 bool CallAnalyzer::visitStore(StoreInst &I) {
999  Value *SROAArg;
1001  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1002  if (I.isSimple()) {
1003  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1004  return true;
1005  }
1006 
1007  disableSROA(CostIt);
1008  }
1009 
1010  return false;
1011 }
1012 
1013 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1014  // Constant folding for extract value is trivial.
1015  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1016  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1017  }))
1018  return true;
1019 
1020  // SROA can look through these but give them a cost.
1021  return false;
1022 }
1023 
1024 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1025  // Constant folding for insert value is trivial.
1026  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1027  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1028  /*InsertedValueOperand*/ COps[1],
1029  I.getIndices());
1030  }))
1031  return true;
1032 
1033  // SROA can look through these but give them a cost.
1034  return false;
1035 }
1036 
1037 /// \brief Try to simplify a call site.
1038 ///
1039 /// Takes a concrete function and callsite and tries to actually simplify it by
1040 /// analyzing the arguments and call itself with instsimplify. Returns true if
1041 /// it has simplified the callsite to some other entity (a constant), making it
1042 /// free.
1043 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1044  // FIXME: Using the instsimplify logic directly for this is inefficient
1045  // because we have to continually rebuild the argument list even when no
1046  // simplifications can be performed. Until that is fixed with remapping
1047  // inside of instsimplify, directly constant fold calls here.
1048  if (!canConstantFoldCallTo(CS, F))
1049  return false;
1050 
1051  // Try to re-map the arguments to constants.
1052  SmallVector<Constant *, 4> ConstantArgs;
1053  ConstantArgs.reserve(CS.arg_size());
1054  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1055  ++I) {
1056  Constant *C = dyn_cast<Constant>(*I);
1057  if (!C)
1058  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1059  if (!C)
1060  return false; // This argument doesn't map to a constant.
1061 
1062  ConstantArgs.push_back(C);
1063  }
1064  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1065  SimplifiedValues[CS.getInstruction()] = C;
1066  return true;
1067  }
1068 
1069  return false;
1070 }
1071 
1072 bool CallAnalyzer::visitCallSite(CallSite CS) {
1073  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1074  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1075  // This aborts the entire analysis.
1076  ExposesReturnsTwice = true;
1077  return false;
1078  }
1079  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1080  ContainsNoDuplicateCall = true;
1081 
1082  if (Function *F = CS.getCalledFunction()) {
1083  // When we have a concrete function, first try to simplify it directly.
1084  if (simplifyCallSite(F, CS))
1085  return true;
1086 
1087  // Next check if it is an intrinsic we know about.
1088  // FIXME: Lift this into part of the InstVisitor.
1089  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1090  switch (II->getIntrinsicID()) {
1091  default:
1092  return Base::visitCallSite(CS);
1093 
1094  case Intrinsic::load_relative:
1095  // This is normally lowered to 4 LLVM instructions.
1096  Cost += 3 * InlineConstants::InstrCost;
1097  return false;
1098 
1099  case Intrinsic::memset:
1100  case Intrinsic::memcpy:
1101  case Intrinsic::memmove:
1102  // SROA can usually chew through these intrinsics, but they aren't free.
1103  return false;
1104  case Intrinsic::localescape:
1105  HasFrameEscape = true;
1106  return false;
1107  }
1108  }
1109 
1110  if (F == CS.getInstruction()->getParent()->getParent()) {
1111  // This flag will fully abort the analysis, so don't bother with anything
1112  // else.
1113  IsRecursiveCall = true;
1114  return false;
1115  }
1116 
1117  if (TTI.isLoweredToCall(F)) {
1118  // We account for the average 1 instruction per call argument setup
1119  // here.
1120  Cost += CS.arg_size() * InlineConstants::InstrCost;
1121 
1122  // Everything other than inline ASM will also have a significant cost
1123  // merely from making the call.
1124  if (!isa<InlineAsm>(CS.getCalledValue()))
1126  }
1127 
1128  return Base::visitCallSite(CS);
1129  }
1130 
1131  // Otherwise we're in a very special case -- an indirect function call. See
1132  // if we can be particularly clever about this.
1133  Value *Callee = CS.getCalledValue();
1134 
1135  // First, pay the price of the argument setup. We account for the average
1136  // 1 instruction per call argument setup here.
1137  Cost += CS.arg_size() * InlineConstants::InstrCost;
1138 
1139  // Next, check if this happens to be an indirect function call to a known
1140  // function in this inline context. If not, we've done all we can.
1141  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1142  if (!F)
1143  return Base::visitCallSite(CS);
1144 
1145  // If we have a constant that we are calling as a function, we can peer
1146  // through it and see the function target. This happens not infrequently
1147  // during devirtualization and so we want to give it a hefty bonus for
1148  // inlining, but cap that bonus in the event that inlining wouldn't pan
1149  // out. Pretend to inline the function, with a custom threshold.
1150  auto IndirectCallParams = Params;
1152  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1153  IndirectCallParams);
1154  if (CA.analyzeCall(CS)) {
1155  // We were able to inline the indirect call! Subtract the cost from the
1156  // threshold to get the bonus we want to apply, but don't go below zero.
1157  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1158  }
1159 
1160  return Base::visitCallSite(CS);
1161 }
1162 
1163 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1164  // At least one return instruction will be free after inlining.
1165  bool Free = !HasReturn;
1166  HasReturn = true;
1167  return Free;
1168 }
1169 
1170 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1171  // We model unconditional branches as essentially free -- they really
1172  // shouldn't exist at all, but handling them makes the behavior of the
1173  // inliner more regular and predictable. Interestingly, conditional branches
1174  // which will fold away are also free.
1175  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1176  dyn_cast_or_null<ConstantInt>(
1177  SimplifiedValues.lookup(BI.getCondition()));
1178 }
1179 
1180 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1181  // We model unconditional switches as free, see the comments on handling
1182  // branches.
1183  if (isa<ConstantInt>(SI.getCondition()))
1184  return true;
1185  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1186  if (isa<ConstantInt>(V))
1187  return true;
1188 
1189  // Assume the most general case where the switch is lowered into
1190  // either a jump table, bit test, or a balanced binary tree consisting of
1191  // case clusters without merging adjacent clusters with the same
1192  // destination. We do not consider the switches that are lowered with a mix
1193  // of jump table/bit test/binary search tree. The cost of the switch is
1194  // proportional to the size of the tree or the size of jump table range.
1195  //
1196  // NB: We convert large switches which are just used to initialize large phi
1197  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1198  // inlining those. It will prevent inlining in cases where the optimization
1199  // does not (yet) fire.
1200 
1201  // Maximum valid cost increased in this function.
1202  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1203 
1204  // Exit early for a large switch, assuming one case needs at least one
1205  // instruction.
1206  // FIXME: This is not true for a bit test, but ignore such case for now to
1207  // save compile-time.
1208  int64_t CostLowerBound =
1209  std::min((int64_t)CostUpperBound,
1210  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1211 
1212  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1213  Cost = CostLowerBound;
1214  return false;
1215  }
1216 
1217  unsigned JumpTableSize = 0;
1218  unsigned NumCaseCluster =
1219  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1220 
1221  // If suitable for a jump table, consider the cost for the table size and
1222  // branch to destination.
1223  if (JumpTableSize) {
1224  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1225  4 * InlineConstants::InstrCost;
1226 
1227  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1228  return false;
1229  }
1230 
1231  // Considering forming a binary search, we should find the number of nodes
1232  // which is same as the number of comparisons when lowered. For a given
1233  // number of clusters, n, we can define a recursive function, f(n), to find
1234  // the number of nodes in the tree. The recursion is :
1235  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1236  // and f(n) = n, when n <= 3.
1237  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1238  // when n > 3. So, the number of comparisons from leaves should be n, while
1239  // the number of non-leaf should be :
1240  // 2^(log2(n) - 1) - 1
1241  // = 2^log2(n) * 2^-1 - 1
1242  // = n / 2 - 1.
1243  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1244  // number of comparisons in a simple closed form :
1245  // n + n / 2 - 1 = n * 3 / 2 - 1
1246  if (NumCaseCluster <= 3) {
1247  // Suppose a comparison includes one compare and one conditional branch.
1248  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1249  return false;
1250  }
1251 
1252  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1253  int64_t SwitchCost =
1254  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1255 
1256  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1257  return false;
1258 }
1259 
1260 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1261  // We never want to inline functions that contain an indirectbr. This is
1262  // incorrect because all the blockaddress's (in static global initializers
1263  // for example) would be referring to the original function, and this
1264  // indirect jump would jump from the inlined copy of the function into the
1265  // original function which is extremely undefined behavior.
1266  // FIXME: This logic isn't really right; we can safely inline functions with
1267  // indirectbr's as long as no other function or global references the
1268  // blockaddress of a block within the current function.
1269  HasIndirectBr = true;
1270  return false;
1271 }
1272 
1273 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1274  // FIXME: It's not clear that a single instruction is an accurate model for
1275  // the inline cost of a resume instruction.
1276  return false;
1277 }
1278 
1279 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1280  // FIXME: It's not clear that a single instruction is an accurate model for
1281  // the inline cost of a cleanupret instruction.
1282  return false;
1283 }
1284 
1285 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1286  // FIXME: It's not clear that a single instruction is an accurate model for
1287  // the inline cost of a catchret instruction.
1288  return false;
1289 }
1290 
1291 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1292  // FIXME: It might be reasonably to discount the cost of instructions leading
1293  // to unreachable as they have the lowest possible impact on both runtime and
1294  // code size.
1295  return true; // No actual code is needed for unreachable.
1296 }
1297 
1298 bool CallAnalyzer::visitInstruction(Instruction &I) {
1299  // Some instructions are free. All of the free intrinsics can also be
1300  // handled by SROA, etc.
1301  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1302  return true;
1303 
1304  // We found something we don't understand or can't handle. Mark any SROA-able
1305  // values in the operand list as no longer viable.
1306  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1307  disableSROA(*OI);
1308 
1309  return false;
1310 }
1311 
1312 /// \brief Analyze a basic block for its contribution to the inline cost.
1313 ///
1314 /// This method walks the analyzer over every instruction in the given basic
1315 /// block and accounts for their cost during inlining at this callsite. It
1316 /// aborts early if the threshold has been exceeded or an impossible to inline
1317 /// construct has been detected. It returns false if inlining is no longer
1318 /// viable, and true if inlining remains viable.
1319 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1320  SmallPtrSetImpl<const Value *> &EphValues) {
1321  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1322  // FIXME: Currently, the number of instructions in a function regardless of
1323  // our ability to simplify them during inline to constants or dead code,
1324  // are actually used by the vector bonus heuristic. As long as that's true,
1325  // we have to special case debug intrinsics here to prevent differences in
1326  // inlining due to debug symbols. Eventually, the number of unsimplified
1327  // instructions shouldn't factor into the cost computation, but until then,
1328  // hack around it here.
1329  if (isa<DbgInfoIntrinsic>(I))
1330  continue;
1331 
1332  // Skip ephemeral values.
1333  if (EphValues.count(&*I))
1334  continue;
1335 
1336  ++NumInstructions;
1337  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1338  ++NumVectorInstructions;
1339 
1340  // If the instruction is floating point, and the target says this operation
1341  // is expensive or the function has the "use-soft-float" attribute, this may
1342  // eventually become a library call. Treat the cost as such.
1343  if (I->getType()->isFloatingPointTy()) {
1344  // If the function has the "use-soft-float" attribute, mark it as
1345  // expensive.
1346  if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1347  (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
1349  }
1350 
1351  // If the instruction simplified to a constant, there is no cost to this
1352  // instruction. Visit the instructions using our InstVisitor to account for
1353  // all of the per-instruction logic. The visit tree returns true if we
1354  // consumed the instruction in any way, and false if the instruction's base
1355  // cost should count against inlining.
1356  if (Base::visit(&*I))
1357  ++NumInstructionsSimplified;
1358  else
1360 
1361  using namespace ore;
1362  // If the visit this instruction detected an uninlinable pattern, abort.
1363  if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1364  HasIndirectBr || HasFrameEscape) {
1365  if (ORE)
1366  ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1367  CandidateCS.getInstruction())
1368  << NV("Callee", &F)
1369  << " has uninlinable pattern and cost is not fully computed");
1370  return false;
1371  }
1372 
1373  // If the caller is a recursive function then we don't want to inline
1374  // functions which allocate a lot of stack space because it would increase
1375  // the caller stack usage dramatically.
1376  if (IsCallerRecursive &&
1378  if (ORE)
1379  ORE->emit(
1380  OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1381  CandidateCS.getInstruction())
1382  << NV("Callee", &F)
1383  << " is recursive and allocates too much stack space. Cost is "
1384  "not fully computed");
1385  return false;
1386  }
1387 
1388  // Check if we've past the maximum possible threshold so we don't spin in
1389  // huge basic blocks that will never inline.
1390  if (Cost >= Threshold && !ComputeFullInlineCost)
1391  return false;
1392  }
1393 
1394  return true;
1395 }
1396 
1397 /// \brief Compute the base pointer and cumulative constant offsets for V.
1398 ///
1399 /// This strips all constant offsets off of V, leaving it the base pointer, and
1400 /// accumulates the total constant offset applied in the returned constant. It
1401 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1402 /// no constant offsets applied.
1403 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1404  if (!V->getType()->isPointerTy())
1405  return nullptr;
1406 
1407  unsigned IntPtrWidth = DL.getPointerSizeInBits();
1408  APInt Offset = APInt::getNullValue(IntPtrWidth);
1409 
1410  // Even though we don't look through PHI nodes, we could be called on an
1411  // instruction in an unreachable block, which may be on a cycle.
1412  SmallPtrSet<Value *, 4> Visited;
1413  Visited.insert(V);
1414  do {
1415  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1416  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1417  return nullptr;
1418  V = GEP->getPointerOperand();
1419  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1420  V = cast<Operator>(V)->getOperand(0);
1421  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1422  if (GA->isInterposable())
1423  break;
1424  V = GA->getAliasee();
1425  } else {
1426  break;
1427  }
1428  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1429  } while (Visited.insert(V).second);
1430 
1431  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1432  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1433 }
1434 
1435 /// \brief Analyze a call site for potential inlining.
1436 ///
1437 /// Returns true if inlining this call is viable, and false if it is not
1438 /// viable. It computes the cost and adjusts the threshold based on numerous
1439 /// factors and heuristics. If this method returns false but the computed cost
1440 /// is below the computed threshold, then inlining was forcibly disabled by
1441 /// some artifact of the routine.
1442 bool CallAnalyzer::analyzeCall(CallSite CS) {
1443  ++NumCallsAnalyzed;
1444 
1445  // Perform some tweaks to the cost and threshold based on the direct
1446  // callsite information.
1447 
1448  // We want to more aggressively inline vector-dense kernels, so up the
1449  // threshold, and we'll lower it if the % of vector instructions gets too
1450  // low. Note that these bonuses are some what arbitrary and evolved over time
1451  // by accident as much as because they are principled bonuses.
1452  //
1453  // FIXME: It would be nice to remove all such bonuses. At least it would be
1454  // nice to base the bonus values on something more scientific.
1455  assert(NumInstructions == 0);
1456  assert(NumVectorInstructions == 0);
1457 
1458  // Update the threshold based on callsite properties
1459  updateThreshold(CS, F);
1460 
1461  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1462  // this Threshold any time, and cost cannot decrease, we can stop processing
1463  // the rest of the function body.
1464  Threshold += (SingleBBBonus + VectorBonus);
1465 
1466  // Give out bonuses for the callsite, as the instructions setting them up
1467  // will be gone after inlining.
1468  Cost -= getCallsiteCost(CS, DL);
1469 
1470  // If this function uses the coldcc calling convention, prefer not to inline
1471  // it.
1472  if (F.getCallingConv() == CallingConv::Cold)
1474 
1475  // Check if we're done. This can happen due to bonuses and penalties.
1476  if (Cost >= Threshold && !ComputeFullInlineCost)
1477  return false;
1478 
1479  if (F.empty())
1480  return true;
1481 
1482  Function *Caller = CS.getInstruction()->getParent()->getParent();
1483  // Check if the caller function is recursive itself.
1484  for (User *U : Caller->users()) {
1485  CallSite Site(U);
1486  if (!Site)
1487  continue;
1488  Instruction *I = Site.getInstruction();
1489  if (I->getParent()->getParent() == Caller) {
1490  IsCallerRecursive = true;
1491  break;
1492  }
1493  }
1494 
1495  // Populate our simplified values by mapping from function arguments to call
1496  // arguments with known important simplifications.
1497  CallSite::arg_iterator CAI = CS.arg_begin();
1498  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1499  FAI != FAE; ++FAI, ++CAI) {
1500  assert(CAI != CS.arg_end());
1501  if (Constant *C = dyn_cast<Constant>(CAI))
1502  SimplifiedValues[&*FAI] = C;
1503 
1504  Value *PtrArg = *CAI;
1505  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1506  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1507 
1508  // We can SROA any pointer arguments derived from alloca instructions.
1509  if (isa<AllocaInst>(PtrArg)) {
1510  SROAArgValues[&*FAI] = PtrArg;
1511  SROAArgCosts[PtrArg] = 0;
1512  }
1513  }
1514  }
1515  NumConstantArgs = SimplifiedValues.size();
1516  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1517  NumAllocaArgs = SROAArgValues.size();
1518 
1519  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1520  // the ephemeral values multiple times (and they're completely determined by
1521  // the callee, so this is purely duplicate work).
1523  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1524 
1525  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1526  // adding basic blocks of the callee which can be proven to be dead for this
1527  // particular call site in order to get more accurate cost estimates. This
1528  // requires a somewhat heavyweight iteration pattern: we need to walk the
1529  // basic blocks in a breadth-first order as we insert live successors. To
1530  // accomplish this, prioritizing for small iterations because we exit after
1531  // crossing our threshold, we use a small-size optimized SetVector.
1534  BBSetVector;
1535  BBSetVector BBWorklist;
1536  BBWorklist.insert(&F.getEntryBlock());
1537  bool SingleBB = true;
1538  // Note that we *must not* cache the size, this loop grows the worklist.
1539  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1540  // Bail out the moment we cross the threshold. This means we'll under-count
1541  // the cost, but only when undercounting doesn't matter.
1542  if (Cost >= Threshold && !ComputeFullInlineCost)
1543  break;
1544 
1545  BasicBlock *BB = BBWorklist[Idx];
1546  if (BB->empty())
1547  continue;
1548 
1549  // Disallow inlining a blockaddress. A blockaddress only has defined
1550  // behavior for an indirect branch in the same function, and we do not
1551  // currently support inlining indirect branches. But, the inliner may not
1552  // see an indirect branch that ends up being dead code at a particular call
1553  // site. If the blockaddress escapes the function, e.g., via a global
1554  // variable, inlining may lead to an invalid cross-function reference.
1555  if (BB->hasAddressTaken())
1556  return false;
1557 
1558  // Analyze the cost of this block. If we blow through the threshold, this
1559  // returns false, and we can bail on out.
1560  if (!analyzeBlock(BB, EphValues))
1561  return false;
1562 
1563  TerminatorInst *TI = BB->getTerminator();
1564 
1565  // Add in the live successors by first checking whether we have terminator
1566  // that may be simplified based on the values simplified by this call.
1567  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1568  if (BI->isConditional()) {
1569  Value *Cond = BI->getCondition();
1570  if (ConstantInt *SimpleCond =
1571  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1572  BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1573  continue;
1574  }
1575  }
1576  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1577  Value *Cond = SI->getCondition();
1578  if (ConstantInt *SimpleCond =
1579  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1580  BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
1581  continue;
1582  }
1583  }
1584 
1585  // If we're unable to select a particular successor, just count all of
1586  // them.
1587  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1588  ++TIdx)
1589  BBWorklist.insert(TI->getSuccessor(TIdx));
1590 
1591  // If we had any successors at this point, than post-inlining is likely to
1592  // have them as well. Note that we assume any basic blocks which existed
1593  // due to branches or switches which folded above will also fold after
1594  // inlining.
1595  if (SingleBB && TI->getNumSuccessors() > 1) {
1596  // Take off the bonus we applied to the threshold.
1597  Threshold -= SingleBBBonus;
1598  SingleBB = false;
1599  }
1600  }
1601 
1602  bool OnlyOneCallAndLocalLinkage =
1603  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1604  // If this is a noduplicate call, we can still inline as long as
1605  // inlining this would cause the removal of the caller (so the instruction
1606  // is not actually duplicated, just moved).
1607  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1608  return false;
1609 
1610  // We applied the maximum possible vector bonus at the beginning. Now,
1611  // subtract the excess bonus, if any, from the Threshold before
1612  // comparing against Cost.
1613  if (NumVectorInstructions <= NumInstructions / 10)
1614  Threshold -= VectorBonus;
1615  else if (NumVectorInstructions <= NumInstructions / 2)
1616  Threshold -= VectorBonus/2;
1617 
1618  return Cost < std::max(1, Threshold);
1619 }
1620 
1621 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1622 /// \brief Dump stats about this call's analysis.
1624 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1625  DEBUG_PRINT_STAT(NumConstantArgs);
1626  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1627  DEBUG_PRINT_STAT(NumAllocaArgs);
1628  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1629  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1630  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1631  DEBUG_PRINT_STAT(NumInstructions);
1632  DEBUG_PRINT_STAT(SROACostSavings);
1633  DEBUG_PRINT_STAT(SROACostSavingsLost);
1634  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1635  DEBUG_PRINT_STAT(Cost);
1636  DEBUG_PRINT_STAT(Threshold);
1637 #undef DEBUG_PRINT_STAT
1638 }
1639 #endif
1640 
1641 /// \brief Test that there are no attribute conflicts between Caller and Callee
1642 /// that prevent inlining.
1644  Function *Callee,
1645  TargetTransformInfo &TTI) {
1646  return TTI.areInlineCompatible(Caller, Callee) &&
1647  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1648 }
1649 
1651  int Cost = 0;
1652  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1653  if (CS.isByValArgument(I)) {
1654  // We approximate the number of loads and stores needed by dividing the
1655  // size of the byval type by the target's pointer size.
1656  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1657  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1658  unsigned PointerSize = DL.getPointerSizeInBits();
1659  // Ceiling division.
1660  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1661 
1662  // If it generates more than 8 stores it is likely to be expanded as an
1663  // inline memcpy so we take that as an upper bound. Otherwise we assume
1664  // one load and one store per word copied.
1665  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1666  // here instead of a magic number of 8, but it's not available via
1667  // DataLayout.
1668  NumStores = std::min(NumStores, 8U);
1669 
1670  Cost += 2 * NumStores * InlineConstants::InstrCost;
1671  } else {
1672  // For non-byval arguments subtract off one instruction per call
1673  // argument.
1675  }
1676  }
1677  // The call instruction also disappears after inlining.
1678  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1679  return Cost;
1680 }
1681 
1683  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1684  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1687  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1688  GetAssumptionCache, GetBFI, PSI, ORE);
1689 }
1690 
1692  CallSite CS, Function *Callee, const InlineParams &Params,
1693  TargetTransformInfo &CalleeTTI,
1694  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1697 
1698  // Cannot inline indirect calls.
1699  if (!Callee)
1700  return llvm::InlineCost::getNever();
1701 
1702  // Calls to functions with always-inline attributes should be inlined
1703  // whenever possible.
1704  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1705  if (isInlineViable(*Callee))
1706  return llvm::InlineCost::getAlways();
1707  return llvm::InlineCost::getNever();
1708  }
1709 
1710  // Never inline functions with conflicting attributes (unless callee has
1711  // always-inline attribute).
1712  Function *Caller = CS.getCaller();
1713  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
1714  return llvm::InlineCost::getNever();
1715 
1716  // Don't inline this call if the caller has the optnone attribute.
1717  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
1718  return llvm::InlineCost::getNever();
1719 
1720  // Don't inline functions which can be interposed at link-time. Don't inline
1721  // functions marked noinline or call sites marked noinline.
1722  // Note: inlining non-exact non-interposable functions is fine, since we know
1723  // we have *a* correct implementation of the source level function.
1724  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1725  CS.isNoInline())
1726  return llvm::InlineCost::getNever();
1727 
1728  DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
1729  << "... (caller:" << Caller->getName() << ")\n");
1730 
1731  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
1732  Params);
1733  bool ShouldInline = CA.analyzeCall(CS);
1734 
1735  DEBUG(CA.dump());
1736 
1737  // Check if there was a reason to force inlining or no inlining.
1738  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1739  return InlineCost::getNever();
1740  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1741  return InlineCost::getAlways();
1742 
1743  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1744 }
1745 
1747  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1748  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1749  // Disallow inlining of functions which contain indirect branches or
1750  // blockaddresses.
1751  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1752  return false;
1753 
1754  for (auto &II : *BI) {
1755  CallSite CS(&II);
1756  if (!CS)
1757  continue;
1758 
1759  // Disallow recursive calls.
1760  if (&F == CS.getCalledFunction())
1761  return false;
1762 
1763  // Disallow calls which expose returns-twice to a function not previously
1764  // attributed as such.
1765  if (!ReturnsTwice && CS.isCall() &&
1766  cast<CallInst>(CS.getInstruction())->canReturnTwice())
1767  return false;
1768 
1769  // Disallow inlining functions that call @llvm.localescape. Doing this
1770  // correctly would require major changes to the inliner.
1771  if (CS.getCalledFunction() &&
1773  llvm::Intrinsic::localescape)
1774  return false;
1775  }
1776  }
1777 
1778  return true;
1779 }
1780 
1781 // APIs to create InlineParams based on command line flags and/or other
1782 // parameters.
1783 
1785  InlineParams Params;
1786 
1787  // This field is the threshold to use for a callee by default. This is
1788  // derived from one or more of:
1789  // * optimization or size-optimization levels,
1790  // * a value passed to createFunctionInliningPass function, or
1791  // * the -inline-threshold flag.
1792  // If the -inline-threshold flag is explicitly specified, that is used
1793  // irrespective of anything else.
1794  if (InlineThreshold.getNumOccurrences() > 0)
1796  else
1797  Params.DefaultThreshold = Threshold;
1798 
1799  // Set the HintThreshold knob from the -inlinehint-threshold.
1800  Params.HintThreshold = HintThreshold;
1801 
1802  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1803  Params.HotCallSiteThreshold = HotCallSiteThreshold;
1804 
1805  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
1806  // populate LocallyHotCallSiteThreshold. Later, we populate
1807  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
1808  // we know that optimization level is O3 (in the getInlineParams variant that
1809  // takes the opt and size levels).
1810  // FIXME: Remove this check (and make the assignment unconditional) after
1811  // addressing size regression issues at O2.
1812  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
1814 
1815  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
1817 
1818  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1819  // -inlinehint-threshold commandline option is not explicitly given. If that
1820  // option is present, then its value applies even for callees with size and
1821  // minsize attributes.
1822  // If the -inline-threshold is not specified, set the ColdThreshold from the
1823  // -inlinecold-threshold even if it is not explicitly passed. If
1824  // -inline-threshold is specified, then -inlinecold-threshold needs to be
1825  // explicitly specified to set the ColdThreshold knob
1826  if (InlineThreshold.getNumOccurrences() == 0) {
1829  Params.ColdThreshold = ColdThreshold;
1830  } else if (ColdThreshold.getNumOccurrences() > 0) {
1831  Params.ColdThreshold = ColdThreshold;
1832  }
1833  return Params;
1834 }
1835 
1838 }
1839 
1840 // Compute the default threshold for inlining based on the opt level and the
1841 // size opt level.
1842 static int computeThresholdFromOptLevels(unsigned OptLevel,
1843  unsigned SizeOptLevel) {
1844  if (OptLevel > 2)
1846  if (SizeOptLevel == 1) // -Os
1848  if (SizeOptLevel == 2) // -Oz
1850  return InlineThreshold;
1851 }
1852 
1853 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1854  auto Params =
1855  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1856  // At O3, use the value of -locally-hot-callsite-threshold option to populate
1857  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
1858  // when it is specified explicitly.
1859  if (OptLevel > 2)
1861  return Params;
1862 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:130
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:850
bool isSimple() const
Definition: Instructions.h:262
bool empty() const
Definition: Function.h:594
bool hasLocalLinkage() const
Definition: GlobalValue.h:416
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:523
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:141
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
iterator end()
Definition: Function.h:590
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:562
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
const int ColdccPenalty
Definition: InlineCost.h:48
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:87
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1665
A cache of .assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:400
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:285
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:612
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:346
STATISTIC(NumFunctions, "Total number of functions")
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2126
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:380
op_iterator op_begin()
Definition: User.h:214
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:768
Represents the cost of inlining a function.
Definition: InlineCost.h:65
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:438
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:493
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:135
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:557
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:92
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
bool empty() const
Definition: BasicBlock.h:263
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:929
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:820
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:891
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:962
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:47
An instruction for storing to memory.
Definition: Instructions.h:306
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:588
const int CallPenalty
Definition: InlineCost.h:46
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2048
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:45
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:362
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
Expected to fold away in lowering.
op_iterator op_end()
Definition: User.h:216
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:144
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
static InlineCost getNever()
Definition: InlineCost.h:89
op_range operands()
Definition: User.h:222
Value * getPointerOperand()
Definition: Instructions.h:270
arg_iterator arg_begin()
Definition: Function.h:603
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1930
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:374
This class represents a cast from an integer to a pointer.
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:151
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:831
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:410
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
IterTy arg_begin() const
Definition: CallSite.h:553
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_TYPE
Definition: InlineCost.cpp:40
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
bool isConditional() const
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:138
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:395
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1435
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:532
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:405
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:934
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:515
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1652
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:580
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:52
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:41
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:154
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2072
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static InlineCost getAlways()
Definition: InlineCost.h:86
Value * getPointerOperand()
Definition: Instructions.h:398
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:147
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:132
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:81
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)