LLVM  8.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/CFG.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "inline-cost"
44 
45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
46 
48  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
49  cl::desc("Control the amount of inlining to perform (default = 225)"));
50 
52  "inlinehint-threshold", cl::Hidden, cl::init(325),
53  cl::desc("Threshold for inlining functions with inline hint"));
54 
55 static cl::opt<int>
56  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
57  cl::init(45),
58  cl::desc("Threshold for inlining cold callsites"));
59 
60 // We introduce this threshold to help performance of instrumentation based
61 // PGO before we actually hook up inliner with analysis passes such as BPI and
62 // BFI.
64  "inlinecold-threshold", cl::Hidden, cl::init(45),
65  cl::desc("Threshold for inlining functions with cold attribute"));
66 
67 static cl::opt<int>
68  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
70  cl::desc("Threshold for hot callsites "));
71 
73  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
74  cl::desc("Threshold for locally hot callsites "));
75 
77  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
78  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
79  "entry frequency, for a callsite to be cold in the absence of "
80  "profile information."));
81 
83  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
84  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
85  "entry frequency, for a callsite to be hot in the absence of "
86  "profile information."));
87 
89  "inline-cost-full", cl::Hidden, cl::init(false),
90  cl::desc("Compute the full inline cost of a call site even when the cost "
91  "exceeds the threshold."));
92 
93 namespace {
94 
95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
97  friend class InstVisitor<CallAnalyzer, bool>;
98 
99  /// The TargetTransformInfo available for this compilation.
100  const TargetTransformInfo &TTI;
101 
102  /// Getter for the cache of @llvm.assume intrinsics.
103  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
104 
105  /// Getter for BlockFrequencyInfo
107 
108  /// Profile summary information.
109  ProfileSummaryInfo *PSI;
110 
111  /// The called function.
112  Function &F;
113 
114  // Cache the DataLayout since we use it a lot.
115  const DataLayout &DL;
116 
117  /// The OptimizationRemarkEmitter available for this compilation.
119 
120  /// The candidate callsite being analyzed. Please do not use this to do
121  /// analysis in the caller function; we want the inline cost query to be
122  /// easily cacheable. Instead, use the cover function paramHasAttr.
123  CallSite CandidateCS;
124 
125  /// Tunable parameters that control the analysis.
126  const InlineParams &Params;
127 
128  int Threshold;
129  int Cost;
130  bool ComputeFullInlineCost;
131 
132  bool IsCallerRecursive;
133  bool IsRecursiveCall;
134  bool ExposesReturnsTwice;
135  bool HasDynamicAlloca;
136  bool ContainsNoDuplicateCall;
137  bool HasReturn;
138  bool HasIndirectBr;
139  bool HasUninlineableIntrinsic;
140  bool InitsVargArgs;
141 
142  /// Number of bytes allocated statically by the callee.
143  uint64_t AllocatedSize;
144  unsigned NumInstructions, NumVectorInstructions;
145  int VectorBonus, TenPercentVectorBonus;
146  // Bonus to be applied when the callee has only one reachable basic block.
147  int SingleBBBonus;
148 
149  /// While we walk the potentially-inlined instructions, we build up and
150  /// maintain a mapping of simplified values specific to this callsite. The
151  /// idea is to propagate any special information we have about arguments to
152  /// this call through the inlinable section of the function, and account for
153  /// likely simplifications post-inlining. The most important aspect we track
154  /// is CFG altering simplifications -- when we prove a basic block dead, that
155  /// can cause dramatic shifts in the cost of inlining a function.
156  DenseMap<Value *, Constant *> SimplifiedValues;
157 
158  /// Keep track of the values which map back (through function arguments) to
159  /// allocas on the caller stack which could be simplified through SROA.
160  DenseMap<Value *, Value *> SROAArgValues;
161 
162  /// The mapping of caller Alloca values to their accumulated cost savings. If
163  /// we have to disable SROA for one of the allocas, this tells us how much
164  /// cost must be added.
165  DenseMap<Value *, int> SROAArgCosts;
166 
167  /// Keep track of values which map to a pointer base and constant offset.
169 
170  /// Keep track of dead blocks due to the constant arguments.
171  SetVector<BasicBlock *> DeadBlocks;
172 
173  /// The mapping of the blocks to their known unique successors due to the
174  /// constant arguments.
175  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
176 
177  /// Model the elimination of repeated loads that is expected to happen
178  /// whenever we simplify away the stores that would otherwise cause them to be
179  /// loads.
180  bool EnableLoadElimination;
181  SmallPtrSet<Value *, 16> LoadAddrSet;
182  int LoadEliminationCost;
183 
184  // Custom simplification helper routines.
185  bool isAllocaDerivedArg(Value *V);
186  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
188  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
189  void disableSROA(Value *V);
190  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
191  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
192  int InstructionCost);
193  void disableLoadElimination();
194  bool isGEPFree(GetElementPtrInst &GEP);
195  bool canFoldInboundsGEP(GetElementPtrInst &I);
196  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
197  bool simplifyCallSite(Function *F, CallSite CS);
198  template <typename Callable>
199  bool simplifyInstruction(Instruction &I, Callable Evaluate);
200  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
201 
202  /// Return true if the given argument to the function being considered for
203  /// inlining has the given attribute set either at the call site or the
204  /// function declaration. Primarily used to inspect call site specific
205  /// attributes since these can be more precise than the ones on the callee
206  /// itself.
207  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
208 
209  /// Return true if the given value is known non null within the callee if
210  /// inlined through this particular callsite.
211  bool isKnownNonNullInCallee(Value *V);
212 
213  /// Update Threshold based on callsite properties such as callee
214  /// attributes and callee hotness for PGO builds. The Callee is explicitly
215  /// passed to support analyzing indirect calls whose target is inferred by
216  /// analysis.
217  void updateThreshold(CallSite CS, Function &Callee);
218 
219  /// Return true if size growth is allowed when inlining the callee at CS.
220  bool allowSizeGrowth(CallSite CS);
221 
222  /// Return true if \p CS is a cold callsite.
223  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
224 
225  /// Return a higher threshold if \p CS is a hot callsite.
226  Optional<int> getHotCallSiteThreshold(CallSite CS,
227  BlockFrequencyInfo *CallerBFI);
228 
229  // Custom analysis routines.
230  InlineResult analyzeBlock(BasicBlock *BB,
231  SmallPtrSetImpl<const Value *> &EphValues);
232 
233  // Disable several entry points to the visitor so we don't accidentally use
234  // them by declaring but not defining them here.
235  void visit(Module *);
236  void visit(Module &);
237  void visit(Function *);
238  void visit(Function &);
239  void visit(BasicBlock *);
240  void visit(BasicBlock &);
241 
242  // Provide base case for our instruction visit.
243  bool visitInstruction(Instruction &I);
244 
245  // Our visit overrides.
246  bool visitAlloca(AllocaInst &I);
247  bool visitPHI(PHINode &I);
248  bool visitGetElementPtr(GetElementPtrInst &I);
249  bool visitBitCast(BitCastInst &I);
250  bool visitPtrToInt(PtrToIntInst &I);
251  bool visitIntToPtr(IntToPtrInst &I);
252  bool visitCastInst(CastInst &I);
253  bool visitUnaryInstruction(UnaryInstruction &I);
254  bool visitCmpInst(CmpInst &I);
255  bool visitSub(BinaryOperator &I);
256  bool visitBinaryOperator(BinaryOperator &I);
257  bool visitLoad(LoadInst &I);
258  bool visitStore(StoreInst &I);
259  bool visitExtractValue(ExtractValueInst &I);
260  bool visitInsertValue(InsertValueInst &I);
261  bool visitCallSite(CallSite CS);
262  bool visitReturnInst(ReturnInst &RI);
263  bool visitBranchInst(BranchInst &BI);
264  bool visitSelectInst(SelectInst &SI);
265  bool visitSwitchInst(SwitchInst &SI);
266  bool visitIndirectBrInst(IndirectBrInst &IBI);
267  bool visitResumeInst(ResumeInst &RI);
268  bool visitCleanupReturnInst(CleanupReturnInst &RI);
269  bool visitCatchReturnInst(CatchReturnInst &RI);
270  bool visitUnreachableInst(UnreachableInst &I);
271 
272 public:
273  CallAnalyzer(const TargetTransformInfo &TTI,
274  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
277  Function &Callee, CallSite CSArg, const InlineParams &Params)
278  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
279  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
280  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
281  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
282  Params.ComputeFullInlineCost || ORE),
283  IsCallerRecursive(false), IsRecursiveCall(false),
284  ExposesReturnsTwice(false), HasDynamicAlloca(false),
285  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
286  HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
287  NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
288  SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
289  NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
290  NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
291  NumInstructionsSimplified(0), SROACostSavings(0),
292  SROACostSavingsLost(0) {}
293 
294  InlineResult analyzeCall(CallSite CS);
295 
296  int getThreshold() { return Threshold; }
297  int getCost() { return Cost; }
298 
299  // Keep a bunch of stats about the cost savings found so we can print them
300  // out when debugging.
301  unsigned NumConstantArgs;
302  unsigned NumConstantOffsetPtrArgs;
303  unsigned NumAllocaArgs;
304  unsigned NumConstantPtrCmps;
305  unsigned NumConstantPtrDiffs;
306  unsigned NumInstructionsSimplified;
307  unsigned SROACostSavings;
308  unsigned SROACostSavingsLost;
309 
310  void dump();
311 };
312 
313 } // namespace
314 
315 /// Test whether the given value is an Alloca-derived function argument.
316 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
317  return SROAArgValues.count(V);
318 }
319 
320 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
321 /// Returns false if V does not map to a SROA-candidate.
322 bool CallAnalyzer::lookupSROAArgAndCost(
324  if (SROAArgValues.empty() || SROAArgCosts.empty())
325  return false;
326 
327  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
328  if (ArgIt == SROAArgValues.end())
329  return false;
330 
331  Arg = ArgIt->second;
332  CostIt = SROAArgCosts.find(Arg);
333  return CostIt != SROAArgCosts.end();
334 }
335 
336 /// Disable SROA for the candidate marked by this cost iterator.
337 ///
338 /// This marks the candidate as no longer viable for SROA, and adds the cost
339 /// savings associated with it back into the inline cost measurement.
340 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
341  // If we're no longer able to perform SROA we need to undo its cost savings
342  // and prevent subsequent analysis.
343  Cost += CostIt->second;
344  SROACostSavings -= CostIt->second;
345  SROACostSavingsLost += CostIt->second;
346  SROAArgCosts.erase(CostIt);
347  disableLoadElimination();
348 }
349 
350 /// If 'V' maps to a SROA candidate, disable SROA for it.
351 void CallAnalyzer::disableSROA(Value *V) {
352  Value *SROAArg;
354  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
355  disableSROA(CostIt);
356 }
357 
358 /// Accumulate the given cost for a particular SROA candidate.
359 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
360  int InstructionCost) {
361  CostIt->second += InstructionCost;
362  SROACostSavings += InstructionCost;
363 }
364 
365 void CallAnalyzer::disableLoadElimination() {
366  if (EnableLoadElimination) {
367  Cost += LoadEliminationCost;
368  LoadEliminationCost = 0;
369  EnableLoadElimination = false;
370  }
371 }
372 
373 /// Accumulate a constant GEP offset into an APInt if possible.
374 ///
375 /// Returns false if unable to compute the offset for any reason. Respects any
376 /// simplified values known during the analysis of this callsite.
377 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
378  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
379  assert(IntPtrWidth == Offset.getBitWidth());
380 
381  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
382  GTI != GTE; ++GTI) {
383  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
384  if (!OpC)
385  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
386  OpC = dyn_cast<ConstantInt>(SimpleOp);
387  if (!OpC)
388  return false;
389  if (OpC->isZero())
390  continue;
391 
392  // Handle a struct index, which adds its field offset to the pointer.
393  if (StructType *STy = GTI.getStructTypeOrNull()) {
394  unsigned ElementIdx = OpC->getZExtValue();
395  const StructLayout *SL = DL.getStructLayout(STy);
396  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
397  continue;
398  }
399 
400  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
401  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
402  }
403  return true;
404 }
405 
406 /// Use TTI to check whether a GEP is free.
407 ///
408 /// Respects any simplified values known during the analysis of this callsite.
409 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
410  SmallVector<Value *, 4> Operands;
411  Operands.push_back(GEP.getOperand(0));
412  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
413  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
414  Operands.push_back(SimpleOp);
415  else
416  Operands.push_back(*I);
417  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
418 }
419 
420 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
421  // Check whether inlining will turn a dynamic alloca into a static
422  // alloca and handle that case.
423  if (I.isArrayAllocation()) {
424  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
425  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
426  Type *Ty = I.getAllocatedType();
427  AllocatedSize = SaturatingMultiplyAdd(
428  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
429  return Base::visitAlloca(I);
430  }
431  }
432 
433  // Accumulate the allocated size.
434  if (I.isStaticAlloca()) {
435  Type *Ty = I.getAllocatedType();
436  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
437  }
438 
439  // We will happily inline static alloca instructions.
440  if (I.isStaticAlloca())
441  return Base::visitAlloca(I);
442 
443  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
444  // a variety of reasons, and so we would like to not inline them into
445  // functions which don't currently have a dynamic alloca. This simply
446  // disables inlining altogether in the presence of a dynamic alloca.
447  HasDynamicAlloca = true;
448  return false;
449 }
450 
451 bool CallAnalyzer::visitPHI(PHINode &I) {
452  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
453  // though we don't want to propagate it's bonuses. The idea is to disable
454  // SROA if it *might* be used in an inappropriate manner.
455 
456  // Phi nodes are always zero-cost.
457  // FIXME: Pointer sizes may differ between different address spaces, so do we
458  // need to use correct address space in the call to getPointerSizeInBits here?
459  // Or could we skip the getPointerSizeInBits call completely? As far as I can
460  // see the ZeroOffset is used as a dummy value, so we can probably use any
461  // bit width for the ZeroOffset?
462  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
463  bool CheckSROA = I.getType()->isPointerTy();
464 
465  // Track the constant or pointer with constant offset we've seen so far.
466  Constant *FirstC = nullptr;
467  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
468  Value *FirstV = nullptr;
469 
470  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
471  BasicBlock *Pred = I.getIncomingBlock(i);
472  // If the incoming block is dead, skip the incoming block.
473  if (DeadBlocks.count(Pred))
474  continue;
475  // If the parent block of phi is not the known successor of the incoming
476  // block, skip the incoming block.
477  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
478  if (KnownSuccessor && KnownSuccessor != I.getParent())
479  continue;
480 
481  Value *V = I.getIncomingValue(i);
482  // If the incoming value is this phi itself, skip the incoming value.
483  if (&I == V)
484  continue;
485 
486  Constant *C = dyn_cast<Constant>(V);
487  if (!C)
488  C = SimplifiedValues.lookup(V);
489 
490  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
491  if (!C && CheckSROA)
492  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
493 
494  if (!C && !BaseAndOffset.first)
495  // The incoming value is neither a constant nor a pointer with constant
496  // offset, exit early.
497  return true;
498 
499  if (FirstC) {
500  if (FirstC == C)
501  // If we've seen a constant incoming value before and it is the same
502  // constant we see this time, continue checking the next incoming value.
503  continue;
504  // Otherwise early exit because we either see a different constant or saw
505  // a constant before but we have a pointer with constant offset this time.
506  return true;
507  }
508 
509  if (FirstV) {
510  // The same logic as above, but check pointer with constant offset here.
511  if (FirstBaseAndOffset == BaseAndOffset)
512  continue;
513  return true;
514  }
515 
516  if (C) {
517  // This is the 1st time we've seen a constant, record it.
518  FirstC = C;
519  continue;
520  }
521 
522  // The remaining case is that this is the 1st time we've seen a pointer with
523  // constant offset, record it.
524  FirstV = V;
525  FirstBaseAndOffset = BaseAndOffset;
526  }
527 
528  // Check if we can map phi to a constant.
529  if (FirstC) {
530  SimplifiedValues[&I] = FirstC;
531  return true;
532  }
533 
534  // Check if we can map phi to a pointer with constant offset.
535  if (FirstBaseAndOffset.first) {
536  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
537 
538  Value *SROAArg;
540  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
541  SROAArgValues[&I] = SROAArg;
542  }
543 
544  return true;
545 }
546 
547 /// Check we can fold GEPs of constant-offset call site argument pointers.
548 /// This requires target data and inbounds GEPs.
549 ///
550 /// \return true if the specified GEP can be folded.
551 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
552  // Check if we have a base + offset for the pointer.
553  std::pair<Value *, APInt> BaseAndOffset =
554  ConstantOffsetPtrs.lookup(I.getPointerOperand());
555  if (!BaseAndOffset.first)
556  return false;
557 
558  // Check if the offset of this GEP is constant, and if so accumulate it
559  // into Offset.
560  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
561  return false;
562 
563  // Add the result as a new mapping to Base + Offset.
564  ConstantOffsetPtrs[&I] = BaseAndOffset;
565 
566  return true;
567 }
568 
569 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
570  Value *SROAArg;
572  bool SROACandidate =
573  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
574 
575  // Lambda to check whether a GEP's indices are all constant.
576  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
577  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
578  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
579  return false;
580  return true;
581  };
582 
583  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
584  if (SROACandidate)
585  SROAArgValues[&I] = SROAArg;
586 
587  // Constant GEPs are modeled as free.
588  return true;
589  }
590 
591  // Variable GEPs will require math and will disable SROA.
592  if (SROACandidate)
593  disableSROA(CostIt);
594  return isGEPFree(I);
595 }
596 
597 /// Simplify \p I if its operands are constants and update SimplifiedValues.
598 /// \p Evaluate is a callable specific to instruction type that evaluates the
599 /// instruction when all the operands are constants.
600 template <typename Callable>
601 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
603  for (Value *Op : I.operands()) {
604  Constant *COp = dyn_cast<Constant>(Op);
605  if (!COp)
606  COp = SimplifiedValues.lookup(Op);
607  if (!COp)
608  return false;
609  COps.push_back(COp);
610  }
611  auto *C = Evaluate(COps);
612  if (!C)
613  return false;
614  SimplifiedValues[&I] = C;
615  return true;
616 }
617 
618 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
619  // Propagate constants through bitcasts.
620  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
621  return ConstantExpr::getBitCast(COps[0], I.getType());
622  }))
623  return true;
624 
625  // Track base/offsets through casts
626  std::pair<Value *, APInt> BaseAndOffset =
627  ConstantOffsetPtrs.lookup(I.getOperand(0));
628  // Casts don't change the offset, just wrap it up.
629  if (BaseAndOffset.first)
630  ConstantOffsetPtrs[&I] = BaseAndOffset;
631 
632  // Also look for SROA candidates here.
633  Value *SROAArg;
635  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
636  SROAArgValues[&I] = SROAArg;
637 
638  // Bitcasts are always zero cost.
639  return true;
640 }
641 
642 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
643  // Propagate constants through ptrtoint.
644  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
645  return ConstantExpr::getPtrToInt(COps[0], I.getType());
646  }))
647  return true;
648 
649  // Track base/offset pairs when converted to a plain integer provided the
650  // integer is large enough to represent the pointer.
651  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
652  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
653  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
654  std::pair<Value *, APInt> BaseAndOffset =
655  ConstantOffsetPtrs.lookup(I.getOperand(0));
656  if (BaseAndOffset.first)
657  ConstantOffsetPtrs[&I] = BaseAndOffset;
658  }
659 
660  // This is really weird. Technically, ptrtoint will disable SROA. However,
661  // unless that ptrtoint is *used* somewhere in the live basic blocks after
662  // inlining, it will be nuked, and SROA should proceed. All of the uses which
663  // would block SROA would also block SROA if applied directly to a pointer,
664  // and so we can just add the integer in here. The only places where SROA is
665  // preserved either cannot fire on an integer, or won't in-and-of themselves
666  // disable SROA (ext) w/o some later use that we would see and disable.
667  Value *SROAArg;
669  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
670  SROAArgValues[&I] = SROAArg;
671 
672  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
673 }
674 
675 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
676  // Propagate constants through ptrtoint.
677  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
678  return ConstantExpr::getIntToPtr(COps[0], I.getType());
679  }))
680  return true;
681 
682  // Track base/offset pairs when round-tripped through a pointer without
683  // modifications provided the integer is not too large.
684  Value *Op = I.getOperand(0);
685  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
686  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
687  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
688  if (BaseAndOffset.first)
689  ConstantOffsetPtrs[&I] = BaseAndOffset;
690  }
691 
692  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
693  Value *SROAArg;
695  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
696  SROAArgValues[&I] = SROAArg;
697 
698  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
699 }
700 
701 bool CallAnalyzer::visitCastInst(CastInst &I) {
702  // Propagate constants through ptrtoint.
703  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
704  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
705  }))
706  return true;
707 
708  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
709  disableSROA(I.getOperand(0));
710 
711  // If this is a floating-point cast, and the target says this operation
712  // is expensive, this may eventually become a library call. Treat the cost
713  // as such.
714  switch (I.getOpcode()) {
715  case Instruction::FPTrunc:
716  case Instruction::FPExt:
717  case Instruction::UIToFP:
718  case Instruction::SIToFP:
719  case Instruction::FPToUI:
720  case Instruction::FPToSI:
721  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
723  default:
724  break;
725  }
726 
727  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
728 }
729 
730 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
731  Value *Operand = I.getOperand(0);
732  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
733  return ConstantFoldInstOperands(&I, COps[0], DL);
734  }))
735  return true;
736 
737  // Disable any SROA on the argument to arbitrary unary operators.
738  disableSROA(Operand);
739 
740  return false;
741 }
742 
743 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
744  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
745 }
746 
747 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
748  // Does the *call site* have the NonNull attribute set on an argument? We
749  // use the attribute on the call site to memoize any analysis done in the
750  // caller. This will also trip if the callee function has a non-null
751  // parameter attribute, but that's a less interesting case because hopefully
752  // the callee would already have been simplified based on that.
753  if (Argument *A = dyn_cast<Argument>(V))
754  if (paramHasAttr(A, Attribute::NonNull))
755  return true;
756 
757  // Is this an alloca in the caller? This is distinct from the attribute case
758  // above because attributes aren't updated within the inliner itself and we
759  // always want to catch the alloca derived case.
760  if (isAllocaDerivedArg(V))
761  // We can actually predict the result of comparisons between an
762  // alloca-derived value and null. Note that this fires regardless of
763  // SROA firing.
764  return true;
765 
766  return false;
767 }
768 
769 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
770  // If the normal destination of the invoke or the parent block of the call
771  // site is unreachable-terminated, there is little point in inlining this
772  // unless there is literally zero cost.
773  // FIXME: Note that it is possible that an unreachable-terminated block has a
774  // hot entry. For example, in below scenario inlining hot_call_X() may be
775  // beneficial :
776  // main() {
777  // hot_call_1();
778  // ...
779  // hot_call_N()
780  // exit(0);
781  // }
782  // For now, we are not handling this corner case here as it is rare in real
783  // code. In future, we should elaborate this based on BPI and BFI in more
784  // general threshold adjusting heuristics in updateThreshold().
785  Instruction *Instr = CS.getInstruction();
786  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
787  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
788  return false;
789  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
790  return false;
791 
792  return true;
793 }
794 
796  // If global profile summary is available, then callsite's coldness is
797  // determined based on that.
798  if (PSI && PSI->hasProfileSummary())
799  return PSI->isColdCallSite(CS, CallerBFI);
800 
801  // Otherwise we need BFI to be available.
802  if (!CallerBFI)
803  return false;
804 
805  // Determine if the callsite is cold relative to caller's entry. We could
806  // potentially cache the computation of scaled entry frequency, but the added
807  // complexity is not worth it unless this scaling shows up high in the
808  // profiles.
809  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
810  auto CallSiteBB = CS.getInstruction()->getParent();
811  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
812  auto CallerEntryFreq =
813  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
814  return CallSiteFreq < CallerEntryFreq * ColdProb;
815 }
816 
818 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
819  BlockFrequencyInfo *CallerBFI) {
820 
821  // If global profile summary is available, then callsite's hotness is
822  // determined based on that.
823  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
824  return Params.HotCallSiteThreshold;
825 
826  // Otherwise we need BFI to be available and to have a locally hot callsite
827  // threshold.
828  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
829  return None;
830 
831  // Determine if the callsite is hot relative to caller's entry. We could
832  // potentially cache the computation of scaled entry frequency, but the added
833  // complexity is not worth it unless this scaling shows up high in the
834  // profiles.
835  auto CallSiteBB = CS.getInstruction()->getParent();
836  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
837  auto CallerEntryFreq = CallerBFI->getEntryFreq();
838  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
839  return Params.LocallyHotCallSiteThreshold;
840 
841  // Otherwise treat it normally.
842  return None;
843 }
844 
845 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
846  // If no size growth is allowed for this inlining, set Threshold to 0.
847  if (!allowSizeGrowth(CS)) {
848  Threshold = 0;
849  return;
850  }
851 
852  Function *Caller = CS.getCaller();
853 
854  // return min(A, B) if B is valid.
855  auto MinIfValid = [](int A, Optional<int> B) {
856  return B ? std::min(A, B.getValue()) : A;
857  };
858 
859  // return max(A, B) if B is valid.
860  auto MaxIfValid = [](int A, Optional<int> B) {
861  return B ? std::max(A, B.getValue()) : A;
862  };
863 
864  // Various bonus percentages. These are multiplied by Threshold to get the
865  // bonus values.
866  // SingleBBBonus: This bonus is applied if the callee has a single reachable
867  // basic block at the given callsite context. This is speculatively applied
868  // and withdrawn if more than one basic block is seen.
869  //
870  // Vector bonuses: We want to more aggressively inline vector-dense kernels
871  // and apply this bonus based on the percentage of vector instructions. A
872  // bonus is applied if the vector instructions exceed 50% and half that amount
873  // is applied if it exceeds 10%. Note that these bonuses are some what
874  // arbitrary and evolved over time by accident as much as because they are
875  // principled bonuses.
876  // FIXME: It would be nice to base the bonus values on something more
877  // scientific.
878  //
879  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
880  // of the last call to a static function as inlining such functions is
881  // guaranteed to reduce code size.
882  //
883  // These bonus percentages may be set to 0 based on properties of the caller
884  // and the callsite.
885  int SingleBBBonusPercent = 50;
886  int VectorBonusPercent = 150;
888 
889  // Lambda to set all the above bonus and bonus percentages to 0.
890  auto DisallowAllBonuses = [&]() {
891  SingleBBBonusPercent = 0;
892  VectorBonusPercent = 0;
893  LastCallToStaticBonus = 0;
894  };
895 
896  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
897  // and reduce the threshold if the caller has the necessary attribute.
898  if (Caller->optForMinSize()) {
899  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
900  // For minsize, we want to disable the single BB bonus and the vector
901  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
902  // a static function will, at the minimum, eliminate the parameter setup and
903  // call/return instructions.
904  SingleBBBonusPercent = 0;
905  VectorBonusPercent = 0;
906  } else if (Caller->optForSize())
907  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
908 
909  // Adjust the threshold based on inlinehint attribute and profile based
910  // hotness information if the caller does not have MinSize attribute.
911  if (!Caller->optForMinSize()) {
912  if (Callee.hasFnAttribute(Attribute::InlineHint))
913  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
914 
915  // FIXME: After switching to the new passmanager, simplify the logic below
916  // by checking only the callsite hotness/coldness as we will reliably
917  // have local profile information.
918  //
919  // Callsite hotness and coldness can be determined if sample profile is
920  // used (which adds hotness metadata to calls) or if caller's
921  // BlockFrequencyInfo is available.
922  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
923  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
924  if (!Caller->optForSize() && HotCallSiteThreshold) {
925  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
926  // FIXME: This should update the threshold only if it exceeds the
927  // current threshold, but AutoFDO + ThinLTO currently relies on this
928  // behavior to prevent inlining of hot callsites during ThinLTO
929  // compile phase.
930  Threshold = HotCallSiteThreshold.getValue();
931  } else if (isColdCallSite(CS, CallerBFI)) {
932  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
933  // Do not apply bonuses for a cold callsite including the
934  // LastCallToStatic bonus. While this bonus might result in code size
935  // reduction, it can cause the size of a non-cold caller to increase
936  // preventing it from being inlined.
937  DisallowAllBonuses();
938  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
939  } else if (PSI) {
940  // Use callee's global profile information only if we have no way of
941  // determining this via callsite information.
942  if (PSI->isFunctionEntryHot(&Callee)) {
943  LLVM_DEBUG(dbgs() << "Hot callee.\n");
944  // If callsite hotness can not be determined, we may still know
945  // that the callee is hot and treat it as a weaker hint for threshold
946  // increase.
947  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
948  } else if (PSI->isFunctionEntryCold(&Callee)) {
949  LLVM_DEBUG(dbgs() << "Cold callee.\n");
950  // Do not apply bonuses for a cold callee including the
951  // LastCallToStatic bonus. While this bonus might result in code size
952  // reduction, it can cause the size of a non-cold caller to increase
953  // preventing it from being inlined.
954  DisallowAllBonuses();
955  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
956  }
957  }
958  }
959 
960  // Finally, take the target-specific inlining threshold multiplier into
961  // account.
962  Threshold *= TTI.getInliningThresholdMultiplier();
963 
964  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
965  VectorBonus = Threshold * VectorBonusPercent / 100;
966 
967  bool OnlyOneCallAndLocalLinkage =
968  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
969  // If there is only one call of the function, and it has internal linkage,
970  // the cost of inlining it drops dramatically. It may seem odd to update
971  // Cost in updateThreshold, but the bonus depends on the logic in this method.
972  if (OnlyOneCallAndLocalLinkage)
973  Cost -= LastCallToStaticBonus;
974 }
975 
976 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
977  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
978  // First try to handle simplified comparisons.
979  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
980  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
981  }))
982  return true;
983 
984  if (I.getOpcode() == Instruction::FCmp)
985  return false;
986 
987  // Otherwise look for a comparison between constant offset pointers with
988  // a common base.
989  Value *LHSBase, *RHSBase;
990  APInt LHSOffset, RHSOffset;
991  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
992  if (LHSBase) {
993  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
994  if (RHSBase && LHSBase == RHSBase) {
995  // We have common bases, fold the icmp to a constant based on the
996  // offsets.
997  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
998  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
999  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1000  SimplifiedValues[&I] = C;
1001  ++NumConstantPtrCmps;
1002  return true;
1003  }
1004  }
1005  }
1006 
1007  // If the comparison is an equality comparison with null, we can simplify it
1008  // if we know the value (argument) can't be null
1009  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1010  isKnownNonNullInCallee(I.getOperand(0))) {
1011  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1012  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1014  return true;
1015  }
1016  // Finally check for SROA candidates in comparisons.
1017  Value *SROAArg;
1019  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1020  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1021  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1022  return true;
1023  }
1024 
1025  disableSROA(CostIt);
1026  }
1027 
1028  return false;
1029 }
1030 
1031 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1032  // Try to handle a special case: we can fold computing the difference of two
1033  // constant-related pointers.
1034  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1035  Value *LHSBase, *RHSBase;
1036  APInt LHSOffset, RHSOffset;
1037  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1038  if (LHSBase) {
1039  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1040  if (RHSBase && LHSBase == RHSBase) {
1041  // We have common bases, fold the subtract to a constant based on the
1042  // offsets.
1043  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1044  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1045  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1046  SimplifiedValues[&I] = C;
1047  ++NumConstantPtrDiffs;
1048  return true;
1049  }
1050  }
1051  }
1052 
1053  // Otherwise, fall back to the generic logic for simplifying and handling
1054  // instructions.
1055  return Base::visitSub(I);
1056 }
1057 
1058 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1059  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1060  Constant *CLHS = dyn_cast<Constant>(LHS);
1061  if (!CLHS)
1062  CLHS = SimplifiedValues.lookup(LHS);
1063  Constant *CRHS = dyn_cast<Constant>(RHS);
1064  if (!CRHS)
1065  CRHS = SimplifiedValues.lookup(RHS);
1066 
1067  Value *SimpleV = nullptr;
1068  if (auto FI = dyn_cast<FPMathOperator>(&I))
1069  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1070  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1071  else
1072  SimpleV =
1073  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1074 
1075  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1076  SimplifiedValues[&I] = C;
1077 
1078  if (SimpleV)
1079  return true;
1080 
1081  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1082  disableSROA(LHS);
1083  disableSROA(RHS);
1084 
1085  // If the instruction is floating point, and the target says this operation
1086  // is expensive, this may eventually become a library call. Treat the cost
1087  // as such.
1088  if (I.getType()->isFloatingPointTy() &&
1089  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1091 
1092  return false;
1093 }
1094 
1095 bool CallAnalyzer::visitLoad(LoadInst &I) {
1096  Value *SROAArg;
1098  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1099  if (I.isSimple()) {
1100  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1101  return true;
1102  }
1103 
1104  disableSROA(CostIt);
1105  }
1106 
1107  // If the data is already loaded from this address and hasn't been clobbered
1108  // by any stores or calls, this load is likely to be redundant and can be
1109  // eliminated.
1110  if (EnableLoadElimination &&
1111  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1112  LoadEliminationCost += InlineConstants::InstrCost;
1113  return true;
1114  }
1115 
1116  return false;
1117 }
1118 
1119 bool CallAnalyzer::visitStore(StoreInst &I) {
1120  Value *SROAArg;
1122  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1123  if (I.isSimple()) {
1124  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1125  return true;
1126  }
1127 
1128  disableSROA(CostIt);
1129  }
1130 
1131  // The store can potentially clobber loads and prevent repeated loads from
1132  // being eliminated.
1133  // FIXME:
1134  // 1. We can probably keep an initial set of eliminatable loads substracted
1135  // from the cost even when we finally see a store. We just need to disable
1136  // *further* accumulation of elimination savings.
1137  // 2. We should probably at some point thread MemorySSA for the callee into
1138  // this and then use that to actually compute *really* precise savings.
1139  disableLoadElimination();
1140  return false;
1141 }
1142 
1143 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1144  // Constant folding for extract value is trivial.
1145  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1146  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1147  }))
1148  return true;
1149 
1150  // SROA can look through these but give them a cost.
1151  return false;
1152 }
1153 
1154 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1155  // Constant folding for insert value is trivial.
1156  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1157  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1158  /*InsertedValueOperand*/ COps[1],
1159  I.getIndices());
1160  }))
1161  return true;
1162 
1163  // SROA can look through these but give them a cost.
1164  return false;
1165 }
1166 
1167 /// Try to simplify a call site.
1168 ///
1169 /// Takes a concrete function and callsite and tries to actually simplify it by
1170 /// analyzing the arguments and call itself with instsimplify. Returns true if
1171 /// it has simplified the callsite to some other entity (a constant), making it
1172 /// free.
1173 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1174  // FIXME: Using the instsimplify logic directly for this is inefficient
1175  // because we have to continually rebuild the argument list even when no
1176  // simplifications can be performed. Until that is fixed with remapping
1177  // inside of instsimplify, directly constant fold calls here.
1178  if (!canConstantFoldCallTo(CS, F))
1179  return false;
1180 
1181  // Try to re-map the arguments to constants.
1182  SmallVector<Constant *, 4> ConstantArgs;
1183  ConstantArgs.reserve(CS.arg_size());
1184  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1185  ++I) {
1186  Constant *C = dyn_cast<Constant>(*I);
1187  if (!C)
1188  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1189  if (!C)
1190  return false; // This argument doesn't map to a constant.
1191 
1192  ConstantArgs.push_back(C);
1193  }
1194  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1195  SimplifiedValues[CS.getInstruction()] = C;
1196  return true;
1197  }
1198 
1199  return false;
1200 }
1201 
1202 bool CallAnalyzer::visitCallSite(CallSite CS) {
1203  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1204  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1205  // This aborts the entire analysis.
1206  ExposesReturnsTwice = true;
1207  return false;
1208  }
1209  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1210  ContainsNoDuplicateCall = true;
1211 
1212  if (Function *F = CS.getCalledFunction()) {
1213  // When we have a concrete function, first try to simplify it directly.
1214  if (simplifyCallSite(F, CS))
1215  return true;
1216 
1217  // Next check if it is an intrinsic we know about.
1218  // FIXME: Lift this into part of the InstVisitor.
1219  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1220  switch (II->getIntrinsicID()) {
1221  default:
1222  if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1223  disableLoadElimination();
1224  return Base::visitCallSite(CS);
1225 
1226  case Intrinsic::load_relative:
1227  // This is normally lowered to 4 LLVM instructions.
1228  Cost += 3 * InlineConstants::InstrCost;
1229  return false;
1230 
1231  case Intrinsic::memset:
1232  case Intrinsic::memcpy:
1233  case Intrinsic::memmove:
1234  disableLoadElimination();
1235  // SROA can usually chew through these intrinsics, but they aren't free.
1236  return false;
1237  case Intrinsic::icall_branch_funnel:
1238  case Intrinsic::localescape:
1239  HasUninlineableIntrinsic = true;
1240  return false;
1241  case Intrinsic::vastart:
1242  InitsVargArgs = true;
1243  return false;
1244  }
1245  }
1246 
1247  if (F == CS.getInstruction()->getFunction()) {
1248  // This flag will fully abort the analysis, so don't bother with anything
1249  // else.
1250  IsRecursiveCall = true;
1251  return false;
1252  }
1253 
1254  if (TTI.isLoweredToCall(F)) {
1255  // We account for the average 1 instruction per call argument setup
1256  // here.
1257  Cost += CS.arg_size() * InlineConstants::InstrCost;
1258 
1259  // Everything other than inline ASM will also have a significant cost
1260  // merely from making the call.
1261  if (!isa<InlineAsm>(CS.getCalledValue()))
1263  }
1264 
1265  if (!CS.onlyReadsMemory())
1266  disableLoadElimination();
1267  return Base::visitCallSite(CS);
1268  }
1269 
1270  // Otherwise we're in a very special case -- an indirect function call. See
1271  // if we can be particularly clever about this.
1272  Value *Callee = CS.getCalledValue();
1273 
1274  // First, pay the price of the argument setup. We account for the average
1275  // 1 instruction per call argument setup here.
1276  Cost += CS.arg_size() * InlineConstants::InstrCost;
1277 
1278  // Next, check if this happens to be an indirect function call to a known
1279  // function in this inline context. If not, we've done all we can.
1280  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1281  if (!F) {
1282  if (!CS.onlyReadsMemory())
1283  disableLoadElimination();
1284  return Base::visitCallSite(CS);
1285  }
1286 
1287  // If we have a constant that we are calling as a function, we can peer
1288  // through it and see the function target. This happens not infrequently
1289  // during devirtualization and so we want to give it a hefty bonus for
1290  // inlining, but cap that bonus in the event that inlining wouldn't pan
1291  // out. Pretend to inline the function, with a custom threshold.
1292  auto IndirectCallParams = Params;
1294  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1295  IndirectCallParams);
1296  if (CA.analyzeCall(CS)) {
1297  // We were able to inline the indirect call! Subtract the cost from the
1298  // threshold to get the bonus we want to apply, but don't go below zero.
1299  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1300  }
1301 
1302  if (!F->onlyReadsMemory())
1303  disableLoadElimination();
1304  return Base::visitCallSite(CS);
1305 }
1306 
1307 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1308  // At least one return instruction will be free after inlining.
1309  bool Free = !HasReturn;
1310  HasReturn = true;
1311  return Free;
1312 }
1313 
1314 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1315  // We model unconditional branches as essentially free -- they really
1316  // shouldn't exist at all, but handling them makes the behavior of the
1317  // inliner more regular and predictable. Interestingly, conditional branches
1318  // which will fold away are also free.
1319  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1320  dyn_cast_or_null<ConstantInt>(
1321  SimplifiedValues.lookup(BI.getCondition()));
1322 }
1323 
1324 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1325  bool CheckSROA = SI.getType()->isPointerTy();
1326  Value *TrueVal = SI.getTrueValue();
1327  Value *FalseVal = SI.getFalseValue();
1328 
1329  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1330  if (!TrueC)
1331  TrueC = SimplifiedValues.lookup(TrueVal);
1332  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1333  if (!FalseC)
1334  FalseC = SimplifiedValues.lookup(FalseVal);
1335  Constant *CondC =
1336  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1337 
1338  if (!CondC) {
1339  // Select C, X, X => X
1340  if (TrueC == FalseC && TrueC) {
1341  SimplifiedValues[&SI] = TrueC;
1342  return true;
1343  }
1344 
1345  if (!CheckSROA)
1346  return Base::visitSelectInst(SI);
1347 
1348  std::pair<Value *, APInt> TrueBaseAndOffset =
1349  ConstantOffsetPtrs.lookup(TrueVal);
1350  std::pair<Value *, APInt> FalseBaseAndOffset =
1351  ConstantOffsetPtrs.lookup(FalseVal);
1352  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1353  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1354 
1355  Value *SROAArg;
1357  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1358  SROAArgValues[&SI] = SROAArg;
1359  return true;
1360  }
1361 
1362  return Base::visitSelectInst(SI);
1363  }
1364 
1365  // Select condition is a constant.
1366  Value *SelectedV = CondC->isAllOnesValue()
1367  ? TrueVal
1368  : (CondC->isNullValue()) ? FalseVal : nullptr;
1369  if (!SelectedV) {
1370  // Condition is a vector constant that is not all 1s or all 0s. If all
1371  // operands are constants, ConstantExpr::getSelect() can handle the cases
1372  // such as select vectors.
1373  if (TrueC && FalseC) {
1374  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1375  SimplifiedValues[&SI] = C;
1376  return true;
1377  }
1378  }
1379  return Base::visitSelectInst(SI);
1380  }
1381 
1382  // Condition is either all 1s or all 0s. SI can be simplified.
1383  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1384  SimplifiedValues[&SI] = SelectedC;
1385  return true;
1386  }
1387 
1388  if (!CheckSROA)
1389  return true;
1390 
1391  std::pair<Value *, APInt> BaseAndOffset =
1392  ConstantOffsetPtrs.lookup(SelectedV);
1393  if (BaseAndOffset.first) {
1394  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1395 
1396  Value *SROAArg;
1398  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1399  SROAArgValues[&SI] = SROAArg;
1400  }
1401 
1402  return true;
1403 }
1404 
1405 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1406  // We model unconditional switches as free, see the comments on handling
1407  // branches.
1408  if (isa<ConstantInt>(SI.getCondition()))
1409  return true;
1410  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1411  if (isa<ConstantInt>(V))
1412  return true;
1413 
1414  // Assume the most general case where the switch is lowered into
1415  // either a jump table, bit test, or a balanced binary tree consisting of
1416  // case clusters without merging adjacent clusters with the same
1417  // destination. We do not consider the switches that are lowered with a mix
1418  // of jump table/bit test/binary search tree. The cost of the switch is
1419  // proportional to the size of the tree or the size of jump table range.
1420  //
1421  // NB: We convert large switches which are just used to initialize large phi
1422  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1423  // inlining those. It will prevent inlining in cases where the optimization
1424  // does not (yet) fire.
1425 
1426  // Maximum valid cost increased in this function.
1427  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1428 
1429  // Exit early for a large switch, assuming one case needs at least one
1430  // instruction.
1431  // FIXME: This is not true for a bit test, but ignore such case for now to
1432  // save compile-time.
1433  int64_t CostLowerBound =
1434  std::min((int64_t)CostUpperBound,
1435  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1436 
1437  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1438  Cost = CostLowerBound;
1439  return false;
1440  }
1441 
1442  unsigned JumpTableSize = 0;
1443  unsigned NumCaseCluster =
1444  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1445 
1446  // If suitable for a jump table, consider the cost for the table size and
1447  // branch to destination.
1448  if (JumpTableSize) {
1449  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1450  4 * InlineConstants::InstrCost;
1451 
1452  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1453  return false;
1454  }
1455 
1456  // Considering forming a binary search, we should find the number of nodes
1457  // which is same as the number of comparisons when lowered. For a given
1458  // number of clusters, n, we can define a recursive function, f(n), to find
1459  // the number of nodes in the tree. The recursion is :
1460  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1461  // and f(n) = n, when n <= 3.
1462  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1463  // when n > 3. So, the number of comparisons from leaves should be n, while
1464  // the number of non-leaf should be :
1465  // 2^(log2(n) - 1) - 1
1466  // = 2^log2(n) * 2^-1 - 1
1467  // = n / 2 - 1.
1468  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1469  // number of comparisons in a simple closed form :
1470  // n + n / 2 - 1 = n * 3 / 2 - 1
1471  if (NumCaseCluster <= 3) {
1472  // Suppose a comparison includes one compare and one conditional branch.
1473  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1474  return false;
1475  }
1476 
1477  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1478  int64_t SwitchCost =
1479  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1480 
1481  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1482  return false;
1483 }
1484 
1485 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1486  // We never want to inline functions that contain an indirectbr. This is
1487  // incorrect because all the blockaddress's (in static global initializers
1488  // for example) would be referring to the original function, and this
1489  // indirect jump would jump from the inlined copy of the function into the
1490  // original function which is extremely undefined behavior.
1491  // FIXME: This logic isn't really right; we can safely inline functions with
1492  // indirectbr's as long as no other function or global references the
1493  // blockaddress of a block within the current function.
1494  HasIndirectBr = true;
1495  return false;
1496 }
1497 
1498 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1499  // FIXME: It's not clear that a single instruction is an accurate model for
1500  // the inline cost of a resume instruction.
1501  return false;
1502 }
1503 
1504 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1505  // FIXME: It's not clear that a single instruction is an accurate model for
1506  // the inline cost of a cleanupret instruction.
1507  return false;
1508 }
1509 
1510 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1511  // FIXME: It's not clear that a single instruction is an accurate model for
1512  // the inline cost of a catchret instruction.
1513  return false;
1514 }
1515 
1516 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1517  // FIXME: It might be reasonably to discount the cost of instructions leading
1518  // to unreachable as they have the lowest possible impact on both runtime and
1519  // code size.
1520  return true; // No actual code is needed for unreachable.
1521 }
1522 
1523 bool CallAnalyzer::visitInstruction(Instruction &I) {
1524  // Some instructions are free. All of the free intrinsics can also be
1525  // handled by SROA, etc.
1526  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1527  return true;
1528 
1529  // We found something we don't understand or can't handle. Mark any SROA-able
1530  // values in the operand list as no longer viable.
1531  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1532  disableSROA(*OI);
1533 
1534  return false;
1535 }
1536 
1537 /// Analyze a basic block for its contribution to the inline cost.
1538 ///
1539 /// This method walks the analyzer over every instruction in the given basic
1540 /// block and accounts for their cost during inlining at this callsite. It
1541 /// aborts early if the threshold has been exceeded or an impossible to inline
1542 /// construct has been detected. It returns false if inlining is no longer
1543 /// viable, and true if inlining remains viable.
1545 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1546  SmallPtrSetImpl<const Value *> &EphValues) {
1547  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1548  // FIXME: Currently, the number of instructions in a function regardless of
1549  // our ability to simplify them during inline to constants or dead code,
1550  // are actually used by the vector bonus heuristic. As long as that's true,
1551  // we have to special case debug intrinsics here to prevent differences in
1552  // inlining due to debug symbols. Eventually, the number of unsimplified
1553  // instructions shouldn't factor into the cost computation, but until then,
1554  // hack around it here.
1555  if (isa<DbgInfoIntrinsic>(I))
1556  continue;
1557 
1558  // Skip ephemeral values.
1559  if (EphValues.count(&*I))
1560  continue;
1561 
1562  ++NumInstructions;
1563  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1564  ++NumVectorInstructions;
1565 
1566  // If the instruction simplified to a constant, there is no cost to this
1567  // instruction. Visit the instructions using our InstVisitor to account for
1568  // all of the per-instruction logic. The visit tree returns true if we
1569  // consumed the instruction in any way, and false if the instruction's base
1570  // cost should count against inlining.
1571  if (Base::visit(&*I))
1572  ++NumInstructionsSimplified;
1573  else
1575 
1576  using namespace ore;
1577  // If the visit this instruction detected an uninlinable pattern, abort.
1578  InlineResult IR;
1579  if (IsRecursiveCall)
1580  IR = "recursive";
1581  else if (ExposesReturnsTwice)
1582  IR = "exposes returns twice";
1583  else if (HasDynamicAlloca)
1584  IR = "dynamic alloca";
1585  else if (HasIndirectBr)
1586  IR = "indirect branch";
1587  else if (HasUninlineableIntrinsic)
1588  IR = "uninlinable intrinsic";
1589  else if (InitsVargArgs)
1590  IR = "varargs";
1591  if (!IR) {
1592  if (ORE)
1593  ORE->emit([&]() {
1594  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1595  CandidateCS.getInstruction())
1596  << NV("Callee", &F) << " has uninlinable pattern ("
1597  << NV("InlineResult", IR.message)
1598  << ") and cost is not fully computed";
1599  });
1600  return IR;
1601  }
1602 
1603  // If the caller is a recursive function then we don't want to inline
1604  // functions which allocate a lot of stack space because it would increase
1605  // the caller stack usage dramatically.
1606  if (IsCallerRecursive &&
1608  InlineResult IR = "recursive and allocates too much stack space";
1609  if (ORE)
1610  ORE->emit([&]() {
1611  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1612  CandidateCS.getInstruction())
1613  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1614  << ". Cost is not fully computed";
1615  });
1616  return IR;
1617  }
1618 
1619  // Check if we've past the maximum possible threshold so we don't spin in
1620  // huge basic blocks that will never inline.
1621  if (Cost >= Threshold && !ComputeFullInlineCost)
1622  return false;
1623  }
1624 
1625  return true;
1626 }
1627 
1628 /// Compute the base pointer and cumulative constant offsets for V.
1629 ///
1630 /// This strips all constant offsets off of V, leaving it the base pointer, and
1631 /// accumulates the total constant offset applied in the returned constant. It
1632 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1633 /// no constant offsets applied.
1634 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1635  if (!V->getType()->isPointerTy())
1636  return nullptr;
1637 
1638  unsigned AS = V->getType()->getPointerAddressSpace();
1639  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1640  APInt Offset = APInt::getNullValue(IntPtrWidth);
1641 
1642  // Even though we don't look through PHI nodes, we could be called on an
1643  // instruction in an unreachable block, which may be on a cycle.
1644  SmallPtrSet<Value *, 4> Visited;
1645  Visited.insert(V);
1646  do {
1647  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1648  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1649  return nullptr;
1650  V = GEP->getPointerOperand();
1651  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1652  V = cast<Operator>(V)->getOperand(0);
1653  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1654  if (GA->isInterposable())
1655  break;
1656  V = GA->getAliasee();
1657  } else {
1658  break;
1659  }
1660  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1661  } while (Visited.insert(V).second);
1662 
1663  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1664  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1665 }
1666 
1667 /// Find dead blocks due to deleted CFG edges during inlining.
1668 ///
1669 /// If we know the successor of the current block, \p CurrBB, has to be \p
1670 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1671 /// no live incoming CFG edges. If one block is found to be dead, we can
1672 /// continue growing the dead block list by checking the successors of the dead
1673 /// blocks to see if all their incoming edges are dead or not.
1674 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1675  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1676  // A CFG edge is dead if the predecessor is dead or the predessor has a
1677  // known successor which is not the one under exam.
1678  return (DeadBlocks.count(Pred) ||
1679  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1680  };
1681 
1682  auto IsNewlyDead = [&](BasicBlock *BB) {
1683  // If all the edges to a block are dead, the block is also dead.
1684  return (!DeadBlocks.count(BB) &&
1686  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1687  };
1688 
1689  for (BasicBlock *Succ : successors(CurrBB)) {
1690  if (Succ == NextBB || !IsNewlyDead(Succ))
1691  continue;
1693  NewDead.push_back(Succ);
1694  while (!NewDead.empty()) {
1695  BasicBlock *Dead = NewDead.pop_back_val();
1696  if (DeadBlocks.insert(Dead))
1697  // Continue growing the dead block lists.
1698  for (BasicBlock *S : successors(Dead))
1699  if (IsNewlyDead(S))
1700  NewDead.push_back(S);
1701  }
1702  }
1703 }
1704 
1705 /// Analyze a call site for potential inlining.
1706 ///
1707 /// Returns true if inlining this call is viable, and false if it is not
1708 /// viable. It computes the cost and adjusts the threshold based on numerous
1709 /// factors and heuristics. If this method returns false but the computed cost
1710 /// is below the computed threshold, then inlining was forcibly disabled by
1711 /// some artifact of the routine.
1712 InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
1713  ++NumCallsAnalyzed;
1714 
1715  // Perform some tweaks to the cost and threshold based on the direct
1716  // callsite information.
1717 
1718  // We want to more aggressively inline vector-dense kernels, so up the
1719  // threshold, and we'll lower it if the % of vector instructions gets too
1720  // low. Note that these bonuses are some what arbitrary and evolved over time
1721  // by accident as much as because they are principled bonuses.
1722  //
1723  // FIXME: It would be nice to remove all such bonuses. At least it would be
1724  // nice to base the bonus values on something more scientific.
1725  assert(NumInstructions == 0);
1726  assert(NumVectorInstructions == 0);
1727 
1728  // Update the threshold based on callsite properties
1729  updateThreshold(CS, F);
1730 
1731  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1732  // this Threshold any time, and cost cannot decrease, we can stop processing
1733  // the rest of the function body.
1734  Threshold += (SingleBBBonus + VectorBonus);
1735 
1736  // Give out bonuses for the callsite, as the instructions setting them up
1737  // will be gone after inlining.
1738  Cost -= getCallsiteCost(CS, DL);
1739 
1740  // If this function uses the coldcc calling convention, prefer not to inline
1741  // it.
1742  if (F.getCallingConv() == CallingConv::Cold)
1744 
1745  // Check if we're done. This can happen due to bonuses and penalties.
1746  if (Cost >= Threshold && !ComputeFullInlineCost)
1747  return "high cost";
1748 
1749  if (F.empty())
1750  return true;
1751 
1752  Function *Caller = CS.getInstruction()->getFunction();
1753  // Check if the caller function is recursive itself.
1754  for (User *U : Caller->users()) {
1755  CallSite Site(U);
1756  if (!Site)
1757  continue;
1758  Instruction *I = Site.getInstruction();
1759  if (I->getFunction() == Caller) {
1760  IsCallerRecursive = true;
1761  break;
1762  }
1763  }
1764 
1765  // Populate our simplified values by mapping from function arguments to call
1766  // arguments with known important simplifications.
1767  CallSite::arg_iterator CAI = CS.arg_begin();
1768  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1769  FAI != FAE; ++FAI, ++CAI) {
1770  assert(CAI != CS.arg_end());
1771  if (Constant *C = dyn_cast<Constant>(CAI))
1772  SimplifiedValues[&*FAI] = C;
1773 
1774  Value *PtrArg = *CAI;
1775  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1776  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1777 
1778  // We can SROA any pointer arguments derived from alloca instructions.
1779  if (isa<AllocaInst>(PtrArg)) {
1780  SROAArgValues[&*FAI] = PtrArg;
1781  SROAArgCosts[PtrArg] = 0;
1782  }
1783  }
1784  }
1785  NumConstantArgs = SimplifiedValues.size();
1786  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1787  NumAllocaArgs = SROAArgValues.size();
1788 
1789  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1790  // the ephemeral values multiple times (and they're completely determined by
1791  // the callee, so this is purely duplicate work).
1793  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1794 
1795  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1796  // adding basic blocks of the callee which can be proven to be dead for this
1797  // particular call site in order to get more accurate cost estimates. This
1798  // requires a somewhat heavyweight iteration pattern: we need to walk the
1799  // basic blocks in a breadth-first order as we insert live successors. To
1800  // accomplish this, prioritizing for small iterations because we exit after
1801  // crossing our threshold, we use a small-size optimized SetVector.
1804  BBSetVector;
1805  BBSetVector BBWorklist;
1806  BBWorklist.insert(&F.getEntryBlock());
1807  bool SingleBB = true;
1808  // Note that we *must not* cache the size, this loop grows the worklist.
1809  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1810  // Bail out the moment we cross the threshold. This means we'll under-count
1811  // the cost, but only when undercounting doesn't matter.
1812  if (Cost >= Threshold && !ComputeFullInlineCost)
1813  break;
1814 
1815  BasicBlock *BB = BBWorklist[Idx];
1816  if (BB->empty())
1817  continue;
1818 
1819  // Disallow inlining a blockaddress. A blockaddress only has defined
1820  // behavior for an indirect branch in the same function, and we do not
1821  // currently support inlining indirect branches. But, the inliner may not
1822  // see an indirect branch that ends up being dead code at a particular call
1823  // site. If the blockaddress escapes the function, e.g., via a global
1824  // variable, inlining may lead to an invalid cross-function reference.
1825  if (BB->hasAddressTaken())
1826  return "blockaddress";
1827 
1828  // Analyze the cost of this block. If we blow through the threshold, this
1829  // returns false, and we can bail on out.
1830  InlineResult IR = analyzeBlock(BB, EphValues);
1831  if (!IR)
1832  return IR;
1833 
1834  TerminatorInst *TI = BB->getTerminator();
1835 
1836  // Add in the live successors by first checking whether we have terminator
1837  // that may be simplified based on the values simplified by this call.
1838  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1839  if (BI->isConditional()) {
1840  Value *Cond = BI->getCondition();
1841  if (ConstantInt *SimpleCond =
1842  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1843  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1844  BBWorklist.insert(NextBB);
1845  KnownSuccessors[BB] = NextBB;
1846  findDeadBlocks(BB, NextBB);
1847  continue;
1848  }
1849  }
1850  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1851  Value *Cond = SI->getCondition();
1852  if (ConstantInt *SimpleCond =
1853  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1854  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1855  BBWorklist.insert(NextBB);
1856  KnownSuccessors[BB] = NextBB;
1857  findDeadBlocks(BB, NextBB);
1858  continue;
1859  }
1860  }
1861 
1862  // If we're unable to select a particular successor, just count all of
1863  // them.
1864  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1865  ++TIdx)
1866  BBWorklist.insert(TI->getSuccessor(TIdx));
1867 
1868  // If we had any successors at this point, than post-inlining is likely to
1869  // have them as well. Note that we assume any basic blocks which existed
1870  // due to branches or switches which folded above will also fold after
1871  // inlining.
1872  if (SingleBB && TI->getNumSuccessors() > 1) {
1873  // Take off the bonus we applied to the threshold.
1874  Threshold -= SingleBBBonus;
1875  SingleBB = false;
1876  }
1877  }
1878 
1879  bool OnlyOneCallAndLocalLinkage =
1880  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1881  // If this is a noduplicate call, we can still inline as long as
1882  // inlining this would cause the removal of the caller (so the instruction
1883  // is not actually duplicated, just moved).
1884  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1885  return "noduplicate";
1886 
1887  // We applied the maximum possible vector bonus at the beginning. Now,
1888  // subtract the excess bonus, if any, from the Threshold before
1889  // comparing against Cost.
1890  if (NumVectorInstructions <= NumInstructions / 10)
1891  Threshold -= VectorBonus;
1892  else if (NumVectorInstructions <= NumInstructions / 2)
1893  Threshold -= VectorBonus/2;
1894 
1895  return Cost < std::max(1, Threshold);
1896 }
1897 
1898 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1899 /// Dump stats about this call's analysis.
1901 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1902  DEBUG_PRINT_STAT(NumConstantArgs);
1903  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1904  DEBUG_PRINT_STAT(NumAllocaArgs);
1905  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1906  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1907  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1908  DEBUG_PRINT_STAT(NumInstructions);
1909  DEBUG_PRINT_STAT(SROACostSavings);
1910  DEBUG_PRINT_STAT(SROACostSavingsLost);
1911  DEBUG_PRINT_STAT(LoadEliminationCost);
1912  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1913  DEBUG_PRINT_STAT(Cost);
1914  DEBUG_PRINT_STAT(Threshold);
1915 #undef DEBUG_PRINT_STAT
1916 }
1917 #endif
1918 
1919 /// Test that there are no attribute conflicts between Caller and Callee
1920 /// that prevent inlining.
1922  Function *Callee,
1923  TargetTransformInfo &TTI) {
1924  return TTI.areInlineCompatible(Caller, Callee) &&
1925  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1926 }
1927 
1929  int Cost = 0;
1930  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1931  if (CS.isByValArgument(I)) {
1932  // We approximate the number of loads and stores needed by dividing the
1933  // size of the byval type by the target's pointer size.
1934  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1935  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1936  unsigned AS = PTy->getAddressSpace();
1937  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1938  // Ceiling division.
1939  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1940 
1941  // If it generates more than 8 stores it is likely to be expanded as an
1942  // inline memcpy so we take that as an upper bound. Otherwise we assume
1943  // one load and one store per word copied.
1944  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1945  // here instead of a magic number of 8, but it's not available via
1946  // DataLayout.
1947  NumStores = std::min(NumStores, 8U);
1948 
1949  Cost += 2 * NumStores * InlineConstants::InstrCost;
1950  } else {
1951  // For non-byval arguments subtract off one instruction per call
1952  // argument.
1954  }
1955  }
1956  // The call instruction also disappears after inlining.
1957  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1958  return Cost;
1959 }
1960 
1962  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1963  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1966  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1967  GetAssumptionCache, GetBFI, PSI, ORE);
1968 }
1969 
1971  CallSite CS, Function *Callee, const InlineParams &Params,
1972  TargetTransformInfo &CalleeTTI,
1973  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1976 
1977  // Cannot inline indirect calls.
1978  if (!Callee)
1979  return llvm::InlineCost::getNever("indirect call");
1980 
1981  // Never inline calls with byval arguments that does not have the alloca
1982  // address space. Since byval arguments can be replaced with a copy to an
1983  // alloca, the inlined code would need to be adjusted to handle that the
1984  // argument is in the alloca address space (so it is a little bit complicated
1985  // to solve).
1986  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
1987  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
1988  if (CS.isByValArgument(I)) {
1989  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1990  if (PTy->getAddressSpace() != AllocaAS)
1991  return llvm::InlineCost::getNever("byval arguments without alloca"
1992  " address space");
1993  }
1994 
1995  // Calls to functions with always-inline attributes should be inlined
1996  // whenever possible.
1997  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1998  if (isInlineViable(*Callee))
1999  return llvm::InlineCost::getAlways("always inline attribute");
2000  return llvm::InlineCost::getNever("inapplicable always inline attribute");
2001  }
2002 
2003  // Never inline functions with conflicting attributes (unless callee has
2004  // always-inline attribute).
2005  Function *Caller = CS.getCaller();
2006  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2007  return llvm::InlineCost::getNever("conflicting attributes");
2008 
2009  // Don't inline this call if the caller has the optnone attribute.
2010  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
2011  return llvm::InlineCost::getNever("optnone attribute");
2012 
2013  // Don't inline a function that treats null pointer as valid into a caller
2014  // that does not have this attribute.
2015  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2016  return llvm::InlineCost::getNever("nullptr definitions incompatible");
2017 
2018  // Don't inline functions which can be interposed at link-time.
2019  if (Callee->isInterposable())
2020  return llvm::InlineCost::getNever("interposable");
2021 
2022  // Don't inline functions marked noinline.
2023  if (Callee->hasFnAttribute(Attribute::NoInline))
2024  return llvm::InlineCost::getNever("noinline function attribute");
2025 
2026  // Don't inline call sites marked noinline.
2027  if (CS.isNoInline())
2028  return llvm::InlineCost::getNever("noinline call site attribute");
2029 
2030  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2031  << "... (caller:" << Caller->getName() << ")\n");
2032 
2033  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
2034  Params);
2035  InlineResult ShouldInline = CA.analyzeCall(CS);
2036 
2037  LLVM_DEBUG(CA.dump());
2038 
2039  // Check if there was a reason to force inlining or no inlining.
2040  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2041  return InlineCost::getNever(ShouldInline.message);
2042  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2043  return InlineCost::getAlways("empty function");
2044 
2045  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2046 }
2047 
2049  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2050  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2051  // Disallow inlining of functions which contain indirect branches or
2052  // blockaddresses.
2053  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
2054  return false;
2055 
2056  for (auto &II : *BI) {
2057  CallSite CS(&II);
2058  if (!CS)
2059  continue;
2060 
2061  // Disallow recursive calls.
2062  if (&F == CS.getCalledFunction())
2063  return false;
2064 
2065  // Disallow calls which expose returns-twice to a function not previously
2066  // attributed as such.
2067  if (!ReturnsTwice && CS.isCall() &&
2068  cast<CallInst>(CS.getInstruction())->canReturnTwice())
2069  return false;
2070 
2071  if (CS.getCalledFunction())
2072  switch (CS.getCalledFunction()->getIntrinsicID()) {
2073  default:
2074  break;
2075  // Disallow inlining of @llvm.icall.branch.funnel because current
2076  // backend can't separate call targets from call arguments.
2077  case llvm::Intrinsic::icall_branch_funnel:
2078  // Disallow inlining functions that call @llvm.localescape. Doing this
2079  // correctly would require major changes to the inliner.
2080  case llvm::Intrinsic::localescape:
2081  // Disallow inlining of functions that initialize VarArgs with va_start.
2082  case llvm::Intrinsic::vastart:
2083  return false;
2084  }
2085  }
2086  }
2087 
2088  return true;
2089 }
2090 
2091 // APIs to create InlineParams based on command line flags and/or other
2092 // parameters.
2093 
2095  InlineParams Params;
2096 
2097  // This field is the threshold to use for a callee by default. This is
2098  // derived from one or more of:
2099  // * optimization or size-optimization levels,
2100  // * a value passed to createFunctionInliningPass function, or
2101  // * the -inline-threshold flag.
2102  // If the -inline-threshold flag is explicitly specified, that is used
2103  // irrespective of anything else.
2104  if (InlineThreshold.getNumOccurrences() > 0)
2106  else
2107  Params.DefaultThreshold = Threshold;
2108 
2109  // Set the HintThreshold knob from the -inlinehint-threshold.
2110  Params.HintThreshold = HintThreshold;
2111 
2112  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2113  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2114 
2115  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2116  // populate LocallyHotCallSiteThreshold. Later, we populate
2117  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2118  // we know that optimization level is O3 (in the getInlineParams variant that
2119  // takes the opt and size levels).
2120  // FIXME: Remove this check (and make the assignment unconditional) after
2121  // addressing size regression issues at O2.
2122  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2124 
2125  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2127 
2128  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2129  // -inlinehint-threshold commandline option is not explicitly given. If that
2130  // option is present, then its value applies even for callees with size and
2131  // minsize attributes.
2132  // If the -inline-threshold is not specified, set the ColdThreshold from the
2133  // -inlinecold-threshold even if it is not explicitly passed. If
2134  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2135  // explicitly specified to set the ColdThreshold knob
2136  if (InlineThreshold.getNumOccurrences() == 0) {
2139  Params.ColdThreshold = ColdThreshold;
2140  } else if (ColdThreshold.getNumOccurrences() > 0) {
2141  Params.ColdThreshold = ColdThreshold;
2142  }
2143  return Params;
2144 }
2145 
2148 }
2149 
2150 // Compute the default threshold for inlining based on the opt level and the
2151 // size opt level.
2152 static int computeThresholdFromOptLevels(unsigned OptLevel,
2153  unsigned SizeOptLevel) {
2154  if (OptLevel > 2)
2156  if (SizeOptLevel == 1) // -Os
2158  if (SizeOptLevel == 2) // -Oz
2160  return InlineThreshold;
2161 }
2162 
2163 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2164  auto Params =
2165  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2166  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2167  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2168  // when it is specified explicitly.
2169  if (OptLevel > 2)
2171  return Params;
2172 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
bool onlyReadsMemory() const
Determine if the function does not access or only reads memory.
Definition: Function.h:468
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:155
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:675
bool isSimple() const
Definition: Instructions.h:266
bool empty() const
Definition: Function.h:662
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:100
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
bool hasLocalLinkage() const
Definition: GlobalValue.h:436
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:355
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:166
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
iterator end()
Definition: Function.h:658
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
const int ColdccPenalty
Definition: InlineCost.h:48
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:107
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1737
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:420
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:680
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:360
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1903
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2197
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:376
op_iterator op_begin()
Definition: User.h:230
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:776
Represents the cost of inlining a function.
Definition: InlineCost.h:65
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:438
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Definition: CallSite.h:454
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:258
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:521
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2141
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:638
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:160
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:575
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:92
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
bool empty() const
Definition: BasicBlock.h:275
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:754
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:645
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:884
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1925
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:966
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:47
An instruction for storing to memory.
Definition: Instructions.h:310
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:656
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:93
const int CallPenalty
Definition: InlineCost.h:46
InlineResult is basically true or false.
Definition: InlineCost.h:137
amdgpu Simplify well known AMD library false Value * Callee
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:170
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1750
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2119
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:45
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:362
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:268
op_iterator op_end()
Definition: User.h:232
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:169
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
op_range operands()
Definition: User.h:238
Value * getPointerOperand()
Definition: Instructions.h:274
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
arg_iterator arg_begin()
Definition: Function.h:671
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2001
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:654
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:176
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:839
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:387
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
IterTy arg_begin() const
Definition: CallSite.h:571
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
#define DEBUG_TYPE
Definition: InlineCost.cpp:43
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
bool isConditional() const
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:123
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:163
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:70
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:400
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1507
const Value * getFalseValue() const
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:759
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:96
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
const char * message
Definition: InlineCost.h:138
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:543
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1724
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:598
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:52
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:41
succ_range successors(Instruction *I)
Definition: CFG.h:262
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:179
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2143
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
bool nullPointerIsDefined() const
Check if null pointer dereferencing is considered undefined behavior for the function.
Definition: Function.cpp:1434
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:569
bool isSimple() const
Definition: Instructions.h:391
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * getPointerOperand()
Definition: Instructions.h:402
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Statically lint checks LLVM IR
Definition: Lint.cpp:193
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:172
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:157
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:88
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)