LLVM  7.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/CFG.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "inline-cost"
44 
45 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
46 
48  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
49  cl::desc("Control the amount of inlining to perform (default = 225)"));
50 
52  "inlinehint-threshold", cl::Hidden, cl::init(325),
53  cl::desc("Threshold for inlining functions with inline hint"));
54 
55 static cl::opt<int>
56  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
57  cl::init(45),
58  cl::desc("Threshold for inlining cold callsites"));
59 
60 // We introduce this threshold to help performance of instrumentation based
61 // PGO before we actually hook up inliner with analysis passes such as BPI and
62 // BFI.
64  "inlinecold-threshold", cl::Hidden, cl::init(45),
65  cl::desc("Threshold for inlining functions with cold attribute"));
66 
67 static cl::opt<int>
68  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
70  cl::desc("Threshold for hot callsites "));
71 
73  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
74  cl::desc("Threshold for locally hot callsites "));
75 
77  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
78  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
79  "entry frequency, for a callsite to be cold in the absence of "
80  "profile information."));
81 
83  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
84  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
85  "entry frequency, for a callsite to be hot in the absence of "
86  "profile information."));
87 
89  "inline-cost-full", cl::Hidden, cl::init(false),
90  cl::desc("Compute the full inline cost of a call site even when the cost "
91  "exceeds the threshold."));
92 
93 namespace {
94 
95 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
97  friend class InstVisitor<CallAnalyzer, bool>;
98 
99  /// The TargetTransformInfo available for this compilation.
100  const TargetTransformInfo &TTI;
101 
102  /// Getter for the cache of @llvm.assume intrinsics.
103  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
104 
105  /// Getter for BlockFrequencyInfo
107 
108  /// Profile summary information.
109  ProfileSummaryInfo *PSI;
110 
111  /// The called function.
112  Function &F;
113 
114  // Cache the DataLayout since we use it a lot.
115  const DataLayout &DL;
116 
117  /// The OptimizationRemarkEmitter available for this compilation.
119 
120  /// The candidate callsite being analyzed. Please do not use this to do
121  /// analysis in the caller function; we want the inline cost query to be
122  /// easily cacheable. Instead, use the cover function paramHasAttr.
123  CallSite CandidateCS;
124 
125  /// Tunable parameters that control the analysis.
126  const InlineParams &Params;
127 
128  int Threshold;
129  int Cost;
130  bool ComputeFullInlineCost;
131 
132  bool IsCallerRecursive;
133  bool IsRecursiveCall;
134  bool ExposesReturnsTwice;
135  bool HasDynamicAlloca;
136  bool ContainsNoDuplicateCall;
137  bool HasReturn;
138  bool HasIndirectBr;
139  bool HasUninlineableIntrinsic;
140  bool UsesVarArgs;
141 
142  /// Number of bytes allocated statically by the callee.
143  uint64_t AllocatedSize;
144  unsigned NumInstructions, NumVectorInstructions;
145  int VectorBonus, TenPercentVectorBonus;
146  // Bonus to be applied when the callee has only one reachable basic block.
147  int SingleBBBonus;
148 
149  /// While we walk the potentially-inlined instructions, we build up and
150  /// maintain a mapping of simplified values specific to this callsite. The
151  /// idea is to propagate any special information we have about arguments to
152  /// this call through the inlinable section of the function, and account for
153  /// likely simplifications post-inlining. The most important aspect we track
154  /// is CFG altering simplifications -- when we prove a basic block dead, that
155  /// can cause dramatic shifts in the cost of inlining a function.
156  DenseMap<Value *, Constant *> SimplifiedValues;
157 
158  /// Keep track of the values which map back (through function arguments) to
159  /// allocas on the caller stack which could be simplified through SROA.
160  DenseMap<Value *, Value *> SROAArgValues;
161 
162  /// The mapping of caller Alloca values to their accumulated cost savings. If
163  /// we have to disable SROA for one of the allocas, this tells us how much
164  /// cost must be added.
165  DenseMap<Value *, int> SROAArgCosts;
166 
167  /// Keep track of values which map to a pointer base and constant offset.
169 
170  /// Keep track of dead blocks due to the constant arguments.
171  SetVector<BasicBlock *> DeadBlocks;
172 
173  /// The mapping of the blocks to their known unique successors due to the
174  /// constant arguments.
175  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
176 
177  /// Model the elimination of repeated loads that is expected to happen
178  /// whenever we simplify away the stores that would otherwise cause them to be
179  /// loads.
180  bool EnableLoadElimination;
181  SmallPtrSet<Value *, 16> LoadAddrSet;
182  int LoadEliminationCost;
183 
184  // Custom simplification helper routines.
185  bool isAllocaDerivedArg(Value *V);
186  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
188  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
189  void disableSROA(Value *V);
190  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
191  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
192  int InstructionCost);
193  void disableLoadElimination();
194  bool isGEPFree(GetElementPtrInst &GEP);
195  bool canFoldInboundsGEP(GetElementPtrInst &I);
196  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
197  bool simplifyCallSite(Function *F, CallSite CS);
198  template <typename Callable>
199  bool simplifyInstruction(Instruction &I, Callable Evaluate);
200  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
201 
202  /// Return true if the given argument to the function being considered for
203  /// inlining has the given attribute set either at the call site or the
204  /// function declaration. Primarily used to inspect call site specific
205  /// attributes since these can be more precise than the ones on the callee
206  /// itself.
207  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
208 
209  /// Return true if the given value is known non null within the callee if
210  /// inlined through this particular callsite.
211  bool isKnownNonNullInCallee(Value *V);
212 
213  /// Update Threshold based on callsite properties such as callee
214  /// attributes and callee hotness for PGO builds. The Callee is explicitly
215  /// passed to support analyzing indirect calls whose target is inferred by
216  /// analysis.
217  void updateThreshold(CallSite CS, Function &Callee);
218 
219  /// Return true if size growth is allowed when inlining the callee at CS.
220  bool allowSizeGrowth(CallSite CS);
221 
222  /// Return true if \p CS is a cold callsite.
223  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
224 
225  /// Return a higher threshold if \p CS is a hot callsite.
226  Optional<int> getHotCallSiteThreshold(CallSite CS,
227  BlockFrequencyInfo *CallerBFI);
228 
229  // Custom analysis routines.
230  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
231 
232  // Disable several entry points to the visitor so we don't accidentally use
233  // them by declaring but not defining them here.
234  void visit(Module *);
235  void visit(Module &);
236  void visit(Function *);
237  void visit(Function &);
238  void visit(BasicBlock *);
239  void visit(BasicBlock &);
240 
241  // Provide base case for our instruction visit.
242  bool visitInstruction(Instruction &I);
243 
244  // Our visit overrides.
245  bool visitAlloca(AllocaInst &I);
246  bool visitPHI(PHINode &I);
247  bool visitGetElementPtr(GetElementPtrInst &I);
248  bool visitBitCast(BitCastInst &I);
249  bool visitPtrToInt(PtrToIntInst &I);
250  bool visitIntToPtr(IntToPtrInst &I);
251  bool visitCastInst(CastInst &I);
252  bool visitUnaryInstruction(UnaryInstruction &I);
253  bool visitCmpInst(CmpInst &I);
254  bool visitSub(BinaryOperator &I);
255  bool visitBinaryOperator(BinaryOperator &I);
256  bool visitLoad(LoadInst &I);
257  bool visitStore(StoreInst &I);
258  bool visitExtractValue(ExtractValueInst &I);
259  bool visitInsertValue(InsertValueInst &I);
260  bool visitCallSite(CallSite CS);
261  bool visitReturnInst(ReturnInst &RI);
262  bool visitBranchInst(BranchInst &BI);
263  bool visitSelectInst(SelectInst &SI);
264  bool visitSwitchInst(SwitchInst &SI);
265  bool visitIndirectBrInst(IndirectBrInst &IBI);
266  bool visitResumeInst(ResumeInst &RI);
267  bool visitCleanupReturnInst(CleanupReturnInst &RI);
268  bool visitCatchReturnInst(CatchReturnInst &RI);
269  bool visitUnreachableInst(UnreachableInst &I);
270 
271 public:
272  CallAnalyzer(const TargetTransformInfo &TTI,
273  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
276  Function &Callee, CallSite CSArg, const InlineParams &Params)
277  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
278  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
279  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
280  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
281  Params.ComputeFullInlineCost || ORE),
282  IsCallerRecursive(false), IsRecursiveCall(false),
283  ExposesReturnsTwice(false), HasDynamicAlloca(false),
284  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
285  HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
286  NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
287  SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
288  NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
289  NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
290  NumInstructionsSimplified(0), SROACostSavings(0),
291  SROACostSavingsLost(0) {}
292 
293  bool analyzeCall(CallSite CS);
294 
295  int getThreshold() { return Threshold; }
296  int getCost() { return Cost; }
297 
298  // Keep a bunch of stats about the cost savings found so we can print them
299  // out when debugging.
300  unsigned NumConstantArgs;
301  unsigned NumConstantOffsetPtrArgs;
302  unsigned NumAllocaArgs;
303  unsigned NumConstantPtrCmps;
304  unsigned NumConstantPtrDiffs;
305  unsigned NumInstructionsSimplified;
306  unsigned SROACostSavings;
307  unsigned SROACostSavingsLost;
308 
309  void dump();
310 };
311 
312 } // namespace
313 
314 /// Test whether the given value is an Alloca-derived function argument.
315 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
316  return SROAArgValues.count(V);
317 }
318 
319 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
320 /// Returns false if V does not map to a SROA-candidate.
321 bool CallAnalyzer::lookupSROAArgAndCost(
323  if (SROAArgValues.empty() || SROAArgCosts.empty())
324  return false;
325 
326  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
327  if (ArgIt == SROAArgValues.end())
328  return false;
329 
330  Arg = ArgIt->second;
331  CostIt = SROAArgCosts.find(Arg);
332  return CostIt != SROAArgCosts.end();
333 }
334 
335 /// Disable SROA for the candidate marked by this cost iterator.
336 ///
337 /// This marks the candidate as no longer viable for SROA, and adds the cost
338 /// savings associated with it back into the inline cost measurement.
339 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
340  // If we're no longer able to perform SROA we need to undo its cost savings
341  // and prevent subsequent analysis.
342  Cost += CostIt->second;
343  SROACostSavings -= CostIt->second;
344  SROACostSavingsLost += CostIt->second;
345  SROAArgCosts.erase(CostIt);
346  disableLoadElimination();
347 }
348 
349 /// If 'V' maps to a SROA candidate, disable SROA for it.
350 void CallAnalyzer::disableSROA(Value *V) {
351  Value *SROAArg;
353  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
354  disableSROA(CostIt);
355 }
356 
357 /// Accumulate the given cost for a particular SROA candidate.
358 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
359  int InstructionCost) {
360  CostIt->second += InstructionCost;
361  SROACostSavings += InstructionCost;
362 }
363 
364 void CallAnalyzer::disableLoadElimination() {
365  if (EnableLoadElimination) {
366  Cost += LoadEliminationCost;
367  LoadEliminationCost = 0;
368  EnableLoadElimination = false;
369  }
370 }
371 
372 /// Accumulate a constant GEP offset into an APInt if possible.
373 ///
374 /// Returns false if unable to compute the offset for any reason. Respects any
375 /// simplified values known during the analysis of this callsite.
376 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
377  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
378  assert(IntPtrWidth == Offset.getBitWidth());
379 
380  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
381  GTI != GTE; ++GTI) {
382  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
383  if (!OpC)
384  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
385  OpC = dyn_cast<ConstantInt>(SimpleOp);
386  if (!OpC)
387  return false;
388  if (OpC->isZero())
389  continue;
390 
391  // Handle a struct index, which adds its field offset to the pointer.
392  if (StructType *STy = GTI.getStructTypeOrNull()) {
393  unsigned ElementIdx = OpC->getZExtValue();
394  const StructLayout *SL = DL.getStructLayout(STy);
395  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
396  continue;
397  }
398 
399  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
400  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
401  }
402  return true;
403 }
404 
405 /// Use TTI to check whether a GEP is free.
406 ///
407 /// Respects any simplified values known during the analysis of this callsite.
408 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
409  SmallVector<Value *, 4> Operands;
410  Operands.push_back(GEP.getOperand(0));
411  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
412  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
413  Operands.push_back(SimpleOp);
414  else
415  Operands.push_back(*I);
416  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
417 }
418 
419 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
420  // Check whether inlining will turn a dynamic alloca into a static
421  // alloca and handle that case.
422  if (I.isArrayAllocation()) {
423  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
424  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
425  Type *Ty = I.getAllocatedType();
426  AllocatedSize = SaturatingMultiplyAdd(
427  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
428  return Base::visitAlloca(I);
429  }
430  }
431 
432  // Accumulate the allocated size.
433  if (I.isStaticAlloca()) {
434  Type *Ty = I.getAllocatedType();
435  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
436  }
437 
438  // We will happily inline static alloca instructions.
439  if (I.isStaticAlloca())
440  return Base::visitAlloca(I);
441 
442  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
443  // a variety of reasons, and so we would like to not inline them into
444  // functions which don't currently have a dynamic alloca. This simply
445  // disables inlining altogether in the presence of a dynamic alloca.
446  HasDynamicAlloca = true;
447  return false;
448 }
449 
450 bool CallAnalyzer::visitPHI(PHINode &I) {
451  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
452  // though we don't want to propagate it's bonuses. The idea is to disable
453  // SROA if it *might* be used in an inappropriate manner.
454 
455  // Phi nodes are always zero-cost.
456  // FIXME: Pointer sizes may differ between different address spaces, so do we
457  // need to use correct address space in the call to getPointerSizeInBits here?
458  // Or could we skip the getPointerSizeInBits call completely? As far as I can
459  // see the ZeroOffset is used as a dummy value, so we can probably use any
460  // bit width for the ZeroOffset?
461  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
462  bool CheckSROA = I.getType()->isPointerTy();
463 
464  // Track the constant or pointer with constant offset we've seen so far.
465  Constant *FirstC = nullptr;
466  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
467  Value *FirstV = nullptr;
468 
469  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
470  BasicBlock *Pred = I.getIncomingBlock(i);
471  // If the incoming block is dead, skip the incoming block.
472  if (DeadBlocks.count(Pred))
473  continue;
474  // If the parent block of phi is not the known successor of the incoming
475  // block, skip the incoming block.
476  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
477  if (KnownSuccessor && KnownSuccessor != I.getParent())
478  continue;
479 
480  Value *V = I.getIncomingValue(i);
481  // If the incoming value is this phi itself, skip the incoming value.
482  if (&I == V)
483  continue;
484 
485  Constant *C = dyn_cast<Constant>(V);
486  if (!C)
487  C = SimplifiedValues.lookup(V);
488 
489  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
490  if (!C && CheckSROA)
491  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
492 
493  if (!C && !BaseAndOffset.first)
494  // The incoming value is neither a constant nor a pointer with constant
495  // offset, exit early.
496  return true;
497 
498  if (FirstC) {
499  if (FirstC == C)
500  // If we've seen a constant incoming value before and it is the same
501  // constant we see this time, continue checking the next incoming value.
502  continue;
503  // Otherwise early exit because we either see a different constant or saw
504  // a constant before but we have a pointer with constant offset this time.
505  return true;
506  }
507 
508  if (FirstV) {
509  // The same logic as above, but check pointer with constant offset here.
510  if (FirstBaseAndOffset == BaseAndOffset)
511  continue;
512  return true;
513  }
514 
515  if (C) {
516  // This is the 1st time we've seen a constant, record it.
517  FirstC = C;
518  continue;
519  }
520 
521  // The remaining case is that this is the 1st time we've seen a pointer with
522  // constant offset, record it.
523  FirstV = V;
524  FirstBaseAndOffset = BaseAndOffset;
525  }
526 
527  // Check if we can map phi to a constant.
528  if (FirstC) {
529  SimplifiedValues[&I] = FirstC;
530  return true;
531  }
532 
533  // Check if we can map phi to a pointer with constant offset.
534  if (FirstBaseAndOffset.first) {
535  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
536 
537  Value *SROAArg;
539  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
540  SROAArgValues[&I] = SROAArg;
541  }
542 
543  return true;
544 }
545 
546 /// Check we can fold GEPs of constant-offset call site argument pointers.
547 /// This requires target data and inbounds GEPs.
548 ///
549 /// \return true if the specified GEP can be folded.
550 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
551  // Check if we have a base + offset for the pointer.
552  std::pair<Value *, APInt> BaseAndOffset =
553  ConstantOffsetPtrs.lookup(I.getPointerOperand());
554  if (!BaseAndOffset.first)
555  return false;
556 
557  // Check if the offset of this GEP is constant, and if so accumulate it
558  // into Offset.
559  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
560  return false;
561 
562  // Add the result as a new mapping to Base + Offset.
563  ConstantOffsetPtrs[&I] = BaseAndOffset;
564 
565  return true;
566 }
567 
568 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
569  Value *SROAArg;
571  bool SROACandidate =
572  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
573 
574  // Lambda to check whether a GEP's indices are all constant.
575  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
576  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
577  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
578  return false;
579  return true;
580  };
581 
582  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
583  if (SROACandidate)
584  SROAArgValues[&I] = SROAArg;
585 
586  // Constant GEPs are modeled as free.
587  return true;
588  }
589 
590  // Variable GEPs will require math and will disable SROA.
591  if (SROACandidate)
592  disableSROA(CostIt);
593  return isGEPFree(I);
594 }
595 
596 /// Simplify \p I if its operands are constants and update SimplifiedValues.
597 /// \p Evaluate is a callable specific to instruction type that evaluates the
598 /// instruction when all the operands are constants.
599 template <typename Callable>
600 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
602  for (Value *Op : I.operands()) {
603  Constant *COp = dyn_cast<Constant>(Op);
604  if (!COp)
605  COp = SimplifiedValues.lookup(Op);
606  if (!COp)
607  return false;
608  COps.push_back(COp);
609  }
610  auto *C = Evaluate(COps);
611  if (!C)
612  return false;
613  SimplifiedValues[&I] = C;
614  return true;
615 }
616 
617 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
618  // Propagate constants through bitcasts.
619  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
620  return ConstantExpr::getBitCast(COps[0], I.getType());
621  }))
622  return true;
623 
624  // Track base/offsets through casts
625  std::pair<Value *, APInt> BaseAndOffset =
626  ConstantOffsetPtrs.lookup(I.getOperand(0));
627  // Casts don't change the offset, just wrap it up.
628  if (BaseAndOffset.first)
629  ConstantOffsetPtrs[&I] = BaseAndOffset;
630 
631  // Also look for SROA candidates here.
632  Value *SROAArg;
634  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
635  SROAArgValues[&I] = SROAArg;
636 
637  // Bitcasts are always zero cost.
638  return true;
639 }
640 
641 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
642  // Propagate constants through ptrtoint.
643  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
644  return ConstantExpr::getPtrToInt(COps[0], I.getType());
645  }))
646  return true;
647 
648  // Track base/offset pairs when converted to a plain integer provided the
649  // integer is large enough to represent the pointer.
650  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
651  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
652  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
653  std::pair<Value *, APInt> BaseAndOffset =
654  ConstantOffsetPtrs.lookup(I.getOperand(0));
655  if (BaseAndOffset.first)
656  ConstantOffsetPtrs[&I] = BaseAndOffset;
657  }
658 
659  // This is really weird. Technically, ptrtoint will disable SROA. However,
660  // unless that ptrtoint is *used* somewhere in the live basic blocks after
661  // inlining, it will be nuked, and SROA should proceed. All of the uses which
662  // would block SROA would also block SROA if applied directly to a pointer,
663  // and so we can just add the integer in here. The only places where SROA is
664  // preserved either cannot fire on an integer, or won't in-and-of themselves
665  // disable SROA (ext) w/o some later use that we would see and disable.
666  Value *SROAArg;
668  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
669  SROAArgValues[&I] = SROAArg;
670 
671  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
672 }
673 
674 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
675  // Propagate constants through ptrtoint.
676  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
677  return ConstantExpr::getIntToPtr(COps[0], I.getType());
678  }))
679  return true;
680 
681  // Track base/offset pairs when round-tripped through a pointer without
682  // modifications provided the integer is not too large.
683  Value *Op = I.getOperand(0);
684  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
685  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
686  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
687  if (BaseAndOffset.first)
688  ConstantOffsetPtrs[&I] = BaseAndOffset;
689  }
690 
691  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
692  Value *SROAArg;
694  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
695  SROAArgValues[&I] = SROAArg;
696 
697  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
698 }
699 
700 bool CallAnalyzer::visitCastInst(CastInst &I) {
701  // Propagate constants through ptrtoint.
702  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
703  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
704  }))
705  return true;
706 
707  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
708  disableSROA(I.getOperand(0));
709 
710  // If this is a floating-point cast, and the target says this operation
711  // is expensive, this may eventually become a library call. Treat the cost
712  // as such.
713  switch (I.getOpcode()) {
714  case Instruction::FPTrunc:
715  case Instruction::FPExt:
716  case Instruction::UIToFP:
717  case Instruction::SIToFP:
718  case Instruction::FPToUI:
719  case Instruction::FPToSI:
720  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
722  default:
723  break;
724  }
725 
726  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
727 }
728 
729 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
730  Value *Operand = I.getOperand(0);
731  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
732  return ConstantFoldInstOperands(&I, COps[0], DL);
733  }))
734  return true;
735 
736  // Disable any SROA on the argument to arbitrary unary operators.
737  disableSROA(Operand);
738 
739  return false;
740 }
741 
742 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
743  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
744 }
745 
746 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
747  // Does the *call site* have the NonNull attribute set on an argument? We
748  // use the attribute on the call site to memoize any analysis done in the
749  // caller. This will also trip if the callee function has a non-null
750  // parameter attribute, but that's a less interesting case because hopefully
751  // the callee would already have been simplified based on that.
752  if (Argument *A = dyn_cast<Argument>(V))
753  if (paramHasAttr(A, Attribute::NonNull))
754  return true;
755 
756  // Is this an alloca in the caller? This is distinct from the attribute case
757  // above because attributes aren't updated within the inliner itself and we
758  // always want to catch the alloca derived case.
759  if (isAllocaDerivedArg(V))
760  // We can actually predict the result of comparisons between an
761  // alloca-derived value and null. Note that this fires regardless of
762  // SROA firing.
763  return true;
764 
765  return false;
766 }
767 
768 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
769  // If the normal destination of the invoke or the parent block of the call
770  // site is unreachable-terminated, there is little point in inlining this
771  // unless there is literally zero cost.
772  // FIXME: Note that it is possible that an unreachable-terminated block has a
773  // hot entry. For example, in below scenario inlining hot_call_X() may be
774  // beneficial :
775  // main() {
776  // hot_call_1();
777  // ...
778  // hot_call_N()
779  // exit(0);
780  // }
781  // For now, we are not handling this corner case here as it is rare in real
782  // code. In future, we should elaborate this based on BPI and BFI in more
783  // general threshold adjusting heuristics in updateThreshold().
784  Instruction *Instr = CS.getInstruction();
785  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
786  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
787  return false;
788  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
789  return false;
790 
791  return true;
792 }
793 
795  // If global profile summary is available, then callsite's coldness is
796  // determined based on that.
797  if (PSI && PSI->hasProfileSummary())
798  return PSI->isColdCallSite(CS, CallerBFI);
799 
800  // Otherwise we need BFI to be available.
801  if (!CallerBFI)
802  return false;
803 
804  // Determine if the callsite is cold relative to caller's entry. We could
805  // potentially cache the computation of scaled entry frequency, but the added
806  // complexity is not worth it unless this scaling shows up high in the
807  // profiles.
808  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
809  auto CallSiteBB = CS.getInstruction()->getParent();
810  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
811  auto CallerEntryFreq =
812  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
813  return CallSiteFreq < CallerEntryFreq * ColdProb;
814 }
815 
817 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
818  BlockFrequencyInfo *CallerBFI) {
819 
820  // If global profile summary is available, then callsite's hotness is
821  // determined based on that.
822  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
823  return Params.HotCallSiteThreshold;
824 
825  // Otherwise we need BFI to be available and to have a locally hot callsite
826  // threshold.
827  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
828  return None;
829 
830  // Determine if the callsite is hot relative to caller's entry. We could
831  // potentially cache the computation of scaled entry frequency, but the added
832  // complexity is not worth it unless this scaling shows up high in the
833  // profiles.
834  auto CallSiteBB = CS.getInstruction()->getParent();
835  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
836  auto CallerEntryFreq = CallerBFI->getEntryFreq();
837  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
838  return Params.LocallyHotCallSiteThreshold;
839 
840  // Otherwise treat it normally.
841  return None;
842 }
843 
844 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
845  // If no size growth is allowed for this inlining, set Threshold to 0.
846  if (!allowSizeGrowth(CS)) {
847  Threshold = 0;
848  return;
849  }
850 
851  Function *Caller = CS.getCaller();
852 
853  // return min(A, B) if B is valid.
854  auto MinIfValid = [](int A, Optional<int> B) {
855  return B ? std::min(A, B.getValue()) : A;
856  };
857 
858  // return max(A, B) if B is valid.
859  auto MaxIfValid = [](int A, Optional<int> B) {
860  return B ? std::max(A, B.getValue()) : A;
861  };
862 
863  // Various bonus percentages. These are multiplied by Threshold to get the
864  // bonus values.
865  // SingleBBBonus: This bonus is applied if the callee has a single reachable
866  // basic block at the given callsite context. This is speculatively applied
867  // and withdrawn if more than one basic block is seen.
868  //
869  // Vector bonuses: We want to more aggressively inline vector-dense kernels
870  // and apply this bonus based on the percentage of vector instructions. A
871  // bonus is applied if the vector instructions exceed 50% and half that amount
872  // is applied if it exceeds 10%. Note that these bonuses are some what
873  // arbitrary and evolved over time by accident as much as because they are
874  // principled bonuses.
875  // FIXME: It would be nice to base the bonus values on something more
876  // scientific.
877  //
878  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
879  // of the last call to a static function as inlining such functions is
880  // guaranteed to reduce code size.
881  //
882  // These bonus percentages may be set to 0 based on properties of the caller
883  // and the callsite.
884  int SingleBBBonusPercent = 50;
885  int VectorBonusPercent = 150;
887 
888  // Lambda to set all the above bonus and bonus percentages to 0.
889  auto DisallowAllBonuses = [&]() {
890  SingleBBBonusPercent = 0;
891  VectorBonusPercent = 0;
892  LastCallToStaticBonus = 0;
893  };
894 
895  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
896  // and reduce the threshold if the caller has the necessary attribute.
897  if (Caller->optForMinSize()) {
898  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
899  // For minsize, we want to disable the single BB bonus and the vector
900  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
901  // a static function will, at the minimum, eliminate the parameter setup and
902  // call/return instructions.
903  SingleBBBonusPercent = 0;
904  VectorBonusPercent = 0;
905  } else if (Caller->optForSize())
906  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
907 
908  // Adjust the threshold based on inlinehint attribute and profile based
909  // hotness information if the caller does not have MinSize attribute.
910  if (!Caller->optForMinSize()) {
911  if (Callee.hasFnAttribute(Attribute::InlineHint))
912  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
913 
914  // FIXME: After switching to the new passmanager, simplify the logic below
915  // by checking only the callsite hotness/coldness as we will reliably
916  // have local profile information.
917  //
918  // Callsite hotness and coldness can be determined if sample profile is
919  // used (which adds hotness metadata to calls) or if caller's
920  // BlockFrequencyInfo is available.
921  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
922  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
923  if (!Caller->optForSize() && HotCallSiteThreshold) {
924  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
925  // FIXME: This should update the threshold only if it exceeds the
926  // current threshold, but AutoFDO + ThinLTO currently relies on this
927  // behavior to prevent inlining of hot callsites during ThinLTO
928  // compile phase.
929  Threshold = HotCallSiteThreshold.getValue();
930  } else if (isColdCallSite(CS, CallerBFI)) {
931  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
932  // Do not apply bonuses for a cold callsite including the
933  // LastCallToStatic bonus. While this bonus might result in code size
934  // reduction, it can cause the size of a non-cold caller to increase
935  // preventing it from being inlined.
936  DisallowAllBonuses();
937  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
938  } else if (PSI) {
939  // Use callee's global profile information only if we have no way of
940  // determining this via callsite information.
941  if (PSI->isFunctionEntryHot(&Callee)) {
942  LLVM_DEBUG(dbgs() << "Hot callee.\n");
943  // If callsite hotness can not be determined, we may still know
944  // that the callee is hot and treat it as a weaker hint for threshold
945  // increase.
946  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
947  } else if (PSI->isFunctionEntryCold(&Callee)) {
948  LLVM_DEBUG(dbgs() << "Cold callee.\n");
949  // Do not apply bonuses for a cold callee including the
950  // LastCallToStatic bonus. While this bonus might result in code size
951  // reduction, it can cause the size of a non-cold caller to increase
952  // preventing it from being inlined.
953  DisallowAllBonuses();
954  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
955  }
956  }
957  }
958 
959  // Finally, take the target-specific inlining threshold multiplier into
960  // account.
961  Threshold *= TTI.getInliningThresholdMultiplier();
962 
963  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
964  VectorBonus = Threshold * VectorBonusPercent / 100;
965 
966  bool OnlyOneCallAndLocalLinkage =
967  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
968  // If there is only one call of the function, and it has internal linkage,
969  // the cost of inlining it drops dramatically. It may seem odd to update
970  // Cost in updateThreshold, but the bonus depends on the logic in this method.
971  if (OnlyOneCallAndLocalLinkage)
972  Cost -= LastCallToStaticBonus;
973 }
974 
975 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
976  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
977  // First try to handle simplified comparisons.
978  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
979  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
980  }))
981  return true;
982 
983  if (I.getOpcode() == Instruction::FCmp)
984  return false;
985 
986  // Otherwise look for a comparison between constant offset pointers with
987  // a common base.
988  Value *LHSBase, *RHSBase;
989  APInt LHSOffset, RHSOffset;
990  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
991  if (LHSBase) {
992  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
993  if (RHSBase && LHSBase == RHSBase) {
994  // We have common bases, fold the icmp to a constant based on the
995  // offsets.
996  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
997  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
998  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
999  SimplifiedValues[&I] = C;
1000  ++NumConstantPtrCmps;
1001  return true;
1002  }
1003  }
1004  }
1005 
1006  // If the comparison is an equality comparison with null, we can simplify it
1007  // if we know the value (argument) can't be null
1008  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1009  isKnownNonNullInCallee(I.getOperand(0))) {
1010  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1011  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1013  return true;
1014  }
1015  // Finally check for SROA candidates in comparisons.
1016  Value *SROAArg;
1018  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1019  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1020  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1021  return true;
1022  }
1023 
1024  disableSROA(CostIt);
1025  }
1026 
1027  return false;
1028 }
1029 
1030 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1031  // Try to handle a special case: we can fold computing the difference of two
1032  // constant-related pointers.
1033  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1034  Value *LHSBase, *RHSBase;
1035  APInt LHSOffset, RHSOffset;
1036  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1037  if (LHSBase) {
1038  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1039  if (RHSBase && LHSBase == RHSBase) {
1040  // We have common bases, fold the subtract to a constant based on the
1041  // offsets.
1042  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1043  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1044  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1045  SimplifiedValues[&I] = C;
1046  ++NumConstantPtrDiffs;
1047  return true;
1048  }
1049  }
1050  }
1051 
1052  // Otherwise, fall back to the generic logic for simplifying and handling
1053  // instructions.
1054  return Base::visitSub(I);
1055 }
1056 
1057 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1058  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1059  Constant *CLHS = dyn_cast<Constant>(LHS);
1060  if (!CLHS)
1061  CLHS = SimplifiedValues.lookup(LHS);
1062  Constant *CRHS = dyn_cast<Constant>(RHS);
1063  if (!CRHS)
1064  CRHS = SimplifiedValues.lookup(RHS);
1065 
1066  Value *SimpleV = nullptr;
1067  if (auto FI = dyn_cast<FPMathOperator>(&I))
1068  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1069  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1070  else
1071  SimpleV =
1072  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1073 
1074  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1075  SimplifiedValues[&I] = C;
1076 
1077  if (SimpleV)
1078  return true;
1079 
1080  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1081  disableSROA(LHS);
1082  disableSROA(RHS);
1083 
1084  // If the instruction is floating point, and the target says this operation
1085  // is expensive, this may eventually become a library call. Treat the cost
1086  // as such.
1087  if (I.getType()->isFloatingPointTy() &&
1088  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1090 
1091  return false;
1092 }
1093 
1094 bool CallAnalyzer::visitLoad(LoadInst &I) {
1095  Value *SROAArg;
1097  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1098  if (I.isSimple()) {
1099  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1100  return true;
1101  }
1102 
1103  disableSROA(CostIt);
1104  }
1105 
1106  // If the data is already loaded from this address and hasn't been clobbered
1107  // by any stores or calls, this load is likely to be redundant and can be
1108  // eliminated.
1109  if (EnableLoadElimination &&
1110  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1111  LoadEliminationCost += InlineConstants::InstrCost;
1112  return true;
1113  }
1114 
1115  return false;
1116 }
1117 
1118 bool CallAnalyzer::visitStore(StoreInst &I) {
1119  Value *SROAArg;
1121  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1122  if (I.isSimple()) {
1123  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1124  return true;
1125  }
1126 
1127  disableSROA(CostIt);
1128  }
1129 
1130  // The store can potentially clobber loads and prevent repeated loads from
1131  // being eliminated.
1132  // FIXME:
1133  // 1. We can probably keep an initial set of eliminatable loads substracted
1134  // from the cost even when we finally see a store. We just need to disable
1135  // *further* accumulation of elimination savings.
1136  // 2. We should probably at some point thread MemorySSA for the callee into
1137  // this and then use that to actually compute *really* precise savings.
1138  disableLoadElimination();
1139  return false;
1140 }
1141 
1142 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1143  // Constant folding for extract value is trivial.
1144  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1145  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1146  }))
1147  return true;
1148 
1149  // SROA can look through these but give them a cost.
1150  return false;
1151 }
1152 
1153 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1154  // Constant folding for insert value is trivial.
1155  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1156  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1157  /*InsertedValueOperand*/ COps[1],
1158  I.getIndices());
1159  }))
1160  return true;
1161 
1162  // SROA can look through these but give them a cost.
1163  return false;
1164 }
1165 
1166 /// Try to simplify a call site.
1167 ///
1168 /// Takes a concrete function and callsite and tries to actually simplify it by
1169 /// analyzing the arguments and call itself with instsimplify. Returns true if
1170 /// it has simplified the callsite to some other entity (a constant), making it
1171 /// free.
1172 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1173  // FIXME: Using the instsimplify logic directly for this is inefficient
1174  // because we have to continually rebuild the argument list even when no
1175  // simplifications can be performed. Until that is fixed with remapping
1176  // inside of instsimplify, directly constant fold calls here.
1177  if (!canConstantFoldCallTo(CS, F))
1178  return false;
1179 
1180  // Try to re-map the arguments to constants.
1181  SmallVector<Constant *, 4> ConstantArgs;
1182  ConstantArgs.reserve(CS.arg_size());
1183  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1184  ++I) {
1185  Constant *C = dyn_cast<Constant>(*I);
1186  if (!C)
1187  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1188  if (!C)
1189  return false; // This argument doesn't map to a constant.
1190 
1191  ConstantArgs.push_back(C);
1192  }
1193  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1194  SimplifiedValues[CS.getInstruction()] = C;
1195  return true;
1196  }
1197 
1198  return false;
1199 }
1200 
1201 bool CallAnalyzer::visitCallSite(CallSite CS) {
1202  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1203  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1204  // This aborts the entire analysis.
1205  ExposesReturnsTwice = true;
1206  return false;
1207  }
1208  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1209  ContainsNoDuplicateCall = true;
1210 
1211  if (Function *F = CS.getCalledFunction()) {
1212  // When we have a concrete function, first try to simplify it directly.
1213  if (simplifyCallSite(F, CS))
1214  return true;
1215 
1216  // Next check if it is an intrinsic we know about.
1217  // FIXME: Lift this into part of the InstVisitor.
1218  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1219  switch (II->getIntrinsicID()) {
1220  default:
1221  if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1222  disableLoadElimination();
1223  return Base::visitCallSite(CS);
1224 
1225  case Intrinsic::load_relative:
1226  // This is normally lowered to 4 LLVM instructions.
1227  Cost += 3 * InlineConstants::InstrCost;
1228  return false;
1229 
1230  case Intrinsic::memset:
1231  case Intrinsic::memcpy:
1232  case Intrinsic::memmove:
1233  disableLoadElimination();
1234  // SROA can usually chew through these intrinsics, but they aren't free.
1235  return false;
1236  case Intrinsic::icall_branch_funnel:
1237  case Intrinsic::localescape:
1238  HasUninlineableIntrinsic = true;
1239  return false;
1240  case Intrinsic::vastart:
1241  case Intrinsic::vaend:
1242  UsesVarArgs = true;
1243  return false;
1244  }
1245  }
1246 
1247  if (F == CS.getInstruction()->getFunction()) {
1248  // This flag will fully abort the analysis, so don't bother with anything
1249  // else.
1250  IsRecursiveCall = true;
1251  return false;
1252  }
1253 
1254  if (TTI.isLoweredToCall(F)) {
1255  // We account for the average 1 instruction per call argument setup
1256  // here.
1257  Cost += CS.arg_size() * InlineConstants::InstrCost;
1258 
1259  // Everything other than inline ASM will also have a significant cost
1260  // merely from making the call.
1261  if (!isa<InlineAsm>(CS.getCalledValue()))
1263  }
1264 
1265  if (!CS.onlyReadsMemory())
1266  disableLoadElimination();
1267  return Base::visitCallSite(CS);
1268  }
1269 
1270  // Otherwise we're in a very special case -- an indirect function call. See
1271  // if we can be particularly clever about this.
1272  Value *Callee = CS.getCalledValue();
1273 
1274  // First, pay the price of the argument setup. We account for the average
1275  // 1 instruction per call argument setup here.
1276  Cost += CS.arg_size() * InlineConstants::InstrCost;
1277 
1278  // Next, check if this happens to be an indirect function call to a known
1279  // function in this inline context. If not, we've done all we can.
1280  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1281  if (!F) {
1282  if (!CS.onlyReadsMemory())
1283  disableLoadElimination();
1284  return Base::visitCallSite(CS);
1285  }
1286 
1287  // If we have a constant that we are calling as a function, we can peer
1288  // through it and see the function target. This happens not infrequently
1289  // during devirtualization and so we want to give it a hefty bonus for
1290  // inlining, but cap that bonus in the event that inlining wouldn't pan
1291  // out. Pretend to inline the function, with a custom threshold.
1292  auto IndirectCallParams = Params;
1294  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1295  IndirectCallParams);
1296  if (CA.analyzeCall(CS)) {
1297  // We were able to inline the indirect call! Subtract the cost from the
1298  // threshold to get the bonus we want to apply, but don't go below zero.
1299  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1300  }
1301 
1302  if (!F->onlyReadsMemory())
1303  disableLoadElimination();
1304  return Base::visitCallSite(CS);
1305 }
1306 
1307 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1308  // At least one return instruction will be free after inlining.
1309  bool Free = !HasReturn;
1310  HasReturn = true;
1311  return Free;
1312 }
1313 
1314 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1315  // We model unconditional branches as essentially free -- they really
1316  // shouldn't exist at all, but handling them makes the behavior of the
1317  // inliner more regular and predictable. Interestingly, conditional branches
1318  // which will fold away are also free.
1319  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1320  dyn_cast_or_null<ConstantInt>(
1321  SimplifiedValues.lookup(BI.getCondition()));
1322 }
1323 
1324 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1325  bool CheckSROA = SI.getType()->isPointerTy();
1326  Value *TrueVal = SI.getTrueValue();
1327  Value *FalseVal = SI.getFalseValue();
1328 
1329  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1330  if (!TrueC)
1331  TrueC = SimplifiedValues.lookup(TrueVal);
1332  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1333  if (!FalseC)
1334  FalseC = SimplifiedValues.lookup(FalseVal);
1335  Constant *CondC =
1336  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1337 
1338  if (!CondC) {
1339  // Select C, X, X => X
1340  if (TrueC == FalseC && TrueC) {
1341  SimplifiedValues[&SI] = TrueC;
1342  return true;
1343  }
1344 
1345  if (!CheckSROA)
1346  return Base::visitSelectInst(SI);
1347 
1348  std::pair<Value *, APInt> TrueBaseAndOffset =
1349  ConstantOffsetPtrs.lookup(TrueVal);
1350  std::pair<Value *, APInt> FalseBaseAndOffset =
1351  ConstantOffsetPtrs.lookup(FalseVal);
1352  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1353  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1354 
1355  Value *SROAArg;
1357  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1358  SROAArgValues[&SI] = SROAArg;
1359  return true;
1360  }
1361 
1362  return Base::visitSelectInst(SI);
1363  }
1364 
1365  // Select condition is a constant.
1366  Value *SelectedV = CondC->isAllOnesValue()
1367  ? TrueVal
1368  : (CondC->isNullValue()) ? FalseVal : nullptr;
1369  if (!SelectedV) {
1370  // Condition is a vector constant that is not all 1s or all 0s. If all
1371  // operands are constants, ConstantExpr::getSelect() can handle the cases
1372  // such as select vectors.
1373  if (TrueC && FalseC) {
1374  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1375  SimplifiedValues[&SI] = C;
1376  return true;
1377  }
1378  }
1379  return Base::visitSelectInst(SI);
1380  }
1381 
1382  // Condition is either all 1s or all 0s. SI can be simplified.
1383  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1384  SimplifiedValues[&SI] = SelectedC;
1385  return true;
1386  }
1387 
1388  if (!CheckSROA)
1389  return true;
1390 
1391  std::pair<Value *, APInt> BaseAndOffset =
1392  ConstantOffsetPtrs.lookup(SelectedV);
1393  if (BaseAndOffset.first) {
1394  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1395 
1396  Value *SROAArg;
1398  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1399  SROAArgValues[&SI] = SROAArg;
1400  }
1401 
1402  return true;
1403 }
1404 
1405 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1406  // We model unconditional switches as free, see the comments on handling
1407  // branches.
1408  if (isa<ConstantInt>(SI.getCondition()))
1409  return true;
1410  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1411  if (isa<ConstantInt>(V))
1412  return true;
1413 
1414  // Assume the most general case where the switch is lowered into
1415  // either a jump table, bit test, or a balanced binary tree consisting of
1416  // case clusters without merging adjacent clusters with the same
1417  // destination. We do not consider the switches that are lowered with a mix
1418  // of jump table/bit test/binary search tree. The cost of the switch is
1419  // proportional to the size of the tree or the size of jump table range.
1420  //
1421  // NB: We convert large switches which are just used to initialize large phi
1422  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1423  // inlining those. It will prevent inlining in cases where the optimization
1424  // does not (yet) fire.
1425 
1426  // Maximum valid cost increased in this function.
1427  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1428 
1429  // Exit early for a large switch, assuming one case needs at least one
1430  // instruction.
1431  // FIXME: This is not true for a bit test, but ignore such case for now to
1432  // save compile-time.
1433  int64_t CostLowerBound =
1434  std::min((int64_t)CostUpperBound,
1435  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1436 
1437  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1438  Cost = CostLowerBound;
1439  return false;
1440  }
1441 
1442  unsigned JumpTableSize = 0;
1443  unsigned NumCaseCluster =
1444  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1445 
1446  // If suitable for a jump table, consider the cost for the table size and
1447  // branch to destination.
1448  if (JumpTableSize) {
1449  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1450  4 * InlineConstants::InstrCost;
1451 
1452  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1453  return false;
1454  }
1455 
1456  // Considering forming a binary search, we should find the number of nodes
1457  // which is same as the number of comparisons when lowered. For a given
1458  // number of clusters, n, we can define a recursive function, f(n), to find
1459  // the number of nodes in the tree. The recursion is :
1460  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1461  // and f(n) = n, when n <= 3.
1462  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1463  // when n > 3. So, the number of comparisons from leaves should be n, while
1464  // the number of non-leaf should be :
1465  // 2^(log2(n) - 1) - 1
1466  // = 2^log2(n) * 2^-1 - 1
1467  // = n / 2 - 1.
1468  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1469  // number of comparisons in a simple closed form :
1470  // n + n / 2 - 1 = n * 3 / 2 - 1
1471  if (NumCaseCluster <= 3) {
1472  // Suppose a comparison includes one compare and one conditional branch.
1473  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1474  return false;
1475  }
1476 
1477  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1478  int64_t SwitchCost =
1479  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1480 
1481  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1482  return false;
1483 }
1484 
1485 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1486  // We never want to inline functions that contain an indirectbr. This is
1487  // incorrect because all the blockaddress's (in static global initializers
1488  // for example) would be referring to the original function, and this
1489  // indirect jump would jump from the inlined copy of the function into the
1490  // original function which is extremely undefined behavior.
1491  // FIXME: This logic isn't really right; we can safely inline functions with
1492  // indirectbr's as long as no other function or global references the
1493  // blockaddress of a block within the current function.
1494  HasIndirectBr = true;
1495  return false;
1496 }
1497 
1498 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1499  // FIXME: It's not clear that a single instruction is an accurate model for
1500  // the inline cost of a resume instruction.
1501  return false;
1502 }
1503 
1504 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1505  // FIXME: It's not clear that a single instruction is an accurate model for
1506  // the inline cost of a cleanupret instruction.
1507  return false;
1508 }
1509 
1510 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1511  // FIXME: It's not clear that a single instruction is an accurate model for
1512  // the inline cost of a catchret instruction.
1513  return false;
1514 }
1515 
1516 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1517  // FIXME: It might be reasonably to discount the cost of instructions leading
1518  // to unreachable as they have the lowest possible impact on both runtime and
1519  // code size.
1520  return true; // No actual code is needed for unreachable.
1521 }
1522 
1523 bool CallAnalyzer::visitInstruction(Instruction &I) {
1524  // Some instructions are free. All of the free intrinsics can also be
1525  // handled by SROA, etc.
1526  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1527  return true;
1528 
1529  // We found something we don't understand or can't handle. Mark any SROA-able
1530  // values in the operand list as no longer viable.
1531  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1532  disableSROA(*OI);
1533 
1534  return false;
1535 }
1536 
1537 /// Analyze a basic block for its contribution to the inline cost.
1538 ///
1539 /// This method walks the analyzer over every instruction in the given basic
1540 /// block and accounts for their cost during inlining at this callsite. It
1541 /// aborts early if the threshold has been exceeded or an impossible to inline
1542 /// construct has been detected. It returns false if inlining is no longer
1543 /// viable, and true if inlining remains viable.
1544 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1545  SmallPtrSetImpl<const Value *> &EphValues) {
1546  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1547  // FIXME: Currently, the number of instructions in a function regardless of
1548  // our ability to simplify them during inline to constants or dead code,
1549  // are actually used by the vector bonus heuristic. As long as that's true,
1550  // we have to special case debug intrinsics here to prevent differences in
1551  // inlining due to debug symbols. Eventually, the number of unsimplified
1552  // instructions shouldn't factor into the cost computation, but until then,
1553  // hack around it here.
1554  if (isa<DbgInfoIntrinsic>(I))
1555  continue;
1556 
1557  // Skip ephemeral values.
1558  if (EphValues.count(&*I))
1559  continue;
1560 
1561  ++NumInstructions;
1562  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1563  ++NumVectorInstructions;
1564 
1565  // If the instruction simplified to a constant, there is no cost to this
1566  // instruction. Visit the instructions using our InstVisitor to account for
1567  // all of the per-instruction logic. The visit tree returns true if we
1568  // consumed the instruction in any way, and false if the instruction's base
1569  // cost should count against inlining.
1570  if (Base::visit(&*I))
1571  ++NumInstructionsSimplified;
1572  else
1574 
1575  using namespace ore;
1576  // If the visit this instruction detected an uninlinable pattern, abort.
1577  if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1578  HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
1579  if (ORE)
1580  ORE->emit([&]() {
1581  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1582  CandidateCS.getInstruction())
1583  << NV("Callee", &F)
1584  << " has uninlinable pattern and cost is not fully computed";
1585  });
1586  return false;
1587  }
1588 
1589  // If the caller is a recursive function then we don't want to inline
1590  // functions which allocate a lot of stack space because it would increase
1591  // the caller stack usage dramatically.
1592  if (IsCallerRecursive &&
1594  if (ORE)
1595  ORE->emit([&]() {
1596  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1597  CandidateCS.getInstruction())
1598  << NV("Callee", &F)
1599  << " is recursive and allocates too much stack space. Cost is "
1600  "not fully computed";
1601  });
1602  return false;
1603  }
1604 
1605  // Check if we've past the maximum possible threshold so we don't spin in
1606  // huge basic blocks that will never inline.
1607  if (Cost >= Threshold && !ComputeFullInlineCost)
1608  return false;
1609  }
1610 
1611  return true;
1612 }
1613 
1614 /// Compute the base pointer and cumulative constant offsets for V.
1615 ///
1616 /// This strips all constant offsets off of V, leaving it the base pointer, and
1617 /// accumulates the total constant offset applied in the returned constant. It
1618 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1619 /// no constant offsets applied.
1620 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1621  if (!V->getType()->isPointerTy())
1622  return nullptr;
1623 
1624  unsigned AS = V->getType()->getPointerAddressSpace();
1625  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1626  APInt Offset = APInt::getNullValue(IntPtrWidth);
1627 
1628  // Even though we don't look through PHI nodes, we could be called on an
1629  // instruction in an unreachable block, which may be on a cycle.
1630  SmallPtrSet<Value *, 4> Visited;
1631  Visited.insert(V);
1632  do {
1633  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1634  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1635  return nullptr;
1636  V = GEP->getPointerOperand();
1637  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1638  V = cast<Operator>(V)->getOperand(0);
1639  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1640  if (GA->isInterposable())
1641  break;
1642  V = GA->getAliasee();
1643  } else {
1644  break;
1645  }
1646  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1647  } while (Visited.insert(V).second);
1648 
1649  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1650  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1651 }
1652 
1653 /// Find dead blocks due to deleted CFG edges during inlining.
1654 ///
1655 /// If we know the successor of the current block, \p CurrBB, has to be \p
1656 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1657 /// no live incoming CFG edges. If one block is found to be dead, we can
1658 /// continue growing the dead block list by checking the successors of the dead
1659 /// blocks to see if all their incoming edges are dead or not.
1660 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1661  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1662  // A CFG edge is dead if the predecessor is dead or the predessor has a
1663  // known successor which is not the one under exam.
1664  return (DeadBlocks.count(Pred) ||
1665  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1666  };
1667 
1668  auto IsNewlyDead = [&](BasicBlock *BB) {
1669  // If all the edges to a block are dead, the block is also dead.
1670  return (!DeadBlocks.count(BB) &&
1672  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1673  };
1674 
1675  for (BasicBlock *Succ : successors(CurrBB)) {
1676  if (Succ == NextBB || !IsNewlyDead(Succ))
1677  continue;
1679  NewDead.push_back(Succ);
1680  while (!NewDead.empty()) {
1681  BasicBlock *Dead = NewDead.pop_back_val();
1682  if (DeadBlocks.insert(Dead))
1683  // Continue growing the dead block lists.
1684  for (BasicBlock *S : successors(Dead))
1685  if (IsNewlyDead(S))
1686  NewDead.push_back(S);
1687  }
1688  }
1689 }
1690 
1691 /// Analyze a call site for potential inlining.
1692 ///
1693 /// Returns true if inlining this call is viable, and false if it is not
1694 /// viable. It computes the cost and adjusts the threshold based on numerous
1695 /// factors and heuristics. If this method returns false but the computed cost
1696 /// is below the computed threshold, then inlining was forcibly disabled by
1697 /// some artifact of the routine.
1698 bool CallAnalyzer::analyzeCall(CallSite CS) {
1699  ++NumCallsAnalyzed;
1700 
1701  // Perform some tweaks to the cost and threshold based on the direct
1702  // callsite information.
1703 
1704  // We want to more aggressively inline vector-dense kernels, so up the
1705  // threshold, and we'll lower it if the % of vector instructions gets too
1706  // low. Note that these bonuses are some what arbitrary and evolved over time
1707  // by accident as much as because they are principled bonuses.
1708  //
1709  // FIXME: It would be nice to remove all such bonuses. At least it would be
1710  // nice to base the bonus values on something more scientific.
1711  assert(NumInstructions == 0);
1712  assert(NumVectorInstructions == 0);
1713 
1714  // Update the threshold based on callsite properties
1715  updateThreshold(CS, F);
1716 
1717  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1718  // this Threshold any time, and cost cannot decrease, we can stop processing
1719  // the rest of the function body.
1720  Threshold += (SingleBBBonus + VectorBonus);
1721 
1722  // Give out bonuses for the callsite, as the instructions setting them up
1723  // will be gone after inlining.
1724  Cost -= getCallsiteCost(CS, DL);
1725 
1726  // If this function uses the coldcc calling convention, prefer not to inline
1727  // it.
1728  if (F.getCallingConv() == CallingConv::Cold)
1730 
1731  // Check if we're done. This can happen due to bonuses and penalties.
1732  if (Cost >= Threshold && !ComputeFullInlineCost)
1733  return false;
1734 
1735  if (F.empty())
1736  return true;
1737 
1738  Function *Caller = CS.getInstruction()->getFunction();
1739  // Check if the caller function is recursive itself.
1740  for (User *U : Caller->users()) {
1741  CallSite Site(U);
1742  if (!Site)
1743  continue;
1744  Instruction *I = Site.getInstruction();
1745  if (I->getFunction() == Caller) {
1746  IsCallerRecursive = true;
1747  break;
1748  }
1749  }
1750 
1751  // Populate our simplified values by mapping from function arguments to call
1752  // arguments with known important simplifications.
1753  CallSite::arg_iterator CAI = CS.arg_begin();
1754  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1755  FAI != FAE; ++FAI, ++CAI) {
1756  assert(CAI != CS.arg_end());
1757  if (Constant *C = dyn_cast<Constant>(CAI))
1758  SimplifiedValues[&*FAI] = C;
1759 
1760  Value *PtrArg = *CAI;
1761  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1762  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1763 
1764  // We can SROA any pointer arguments derived from alloca instructions.
1765  if (isa<AllocaInst>(PtrArg)) {
1766  SROAArgValues[&*FAI] = PtrArg;
1767  SROAArgCosts[PtrArg] = 0;
1768  }
1769  }
1770  }
1771  NumConstantArgs = SimplifiedValues.size();
1772  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1773  NumAllocaArgs = SROAArgValues.size();
1774 
1775  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1776  // the ephemeral values multiple times (and they're completely determined by
1777  // the callee, so this is purely duplicate work).
1779  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1780 
1781  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1782  // adding basic blocks of the callee which can be proven to be dead for this
1783  // particular call site in order to get more accurate cost estimates. This
1784  // requires a somewhat heavyweight iteration pattern: we need to walk the
1785  // basic blocks in a breadth-first order as we insert live successors. To
1786  // accomplish this, prioritizing for small iterations because we exit after
1787  // crossing our threshold, we use a small-size optimized SetVector.
1790  BBSetVector;
1791  BBSetVector BBWorklist;
1792  BBWorklist.insert(&F.getEntryBlock());
1793  bool SingleBB = true;
1794  // Note that we *must not* cache the size, this loop grows the worklist.
1795  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1796  // Bail out the moment we cross the threshold. This means we'll under-count
1797  // the cost, but only when undercounting doesn't matter.
1798  if (Cost >= Threshold && !ComputeFullInlineCost)
1799  break;
1800 
1801  BasicBlock *BB = BBWorklist[Idx];
1802  if (BB->empty())
1803  continue;
1804 
1805  // Disallow inlining a blockaddress. A blockaddress only has defined
1806  // behavior for an indirect branch in the same function, and we do not
1807  // currently support inlining indirect branches. But, the inliner may not
1808  // see an indirect branch that ends up being dead code at a particular call
1809  // site. If the blockaddress escapes the function, e.g., via a global
1810  // variable, inlining may lead to an invalid cross-function reference.
1811  if (BB->hasAddressTaken())
1812  return false;
1813 
1814  // Analyze the cost of this block. If we blow through the threshold, this
1815  // returns false, and we can bail on out.
1816  if (!analyzeBlock(BB, EphValues))
1817  return false;
1818 
1819  TerminatorInst *TI = BB->getTerminator();
1820 
1821  // Add in the live successors by first checking whether we have terminator
1822  // that may be simplified based on the values simplified by this call.
1823  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1824  if (BI->isConditional()) {
1825  Value *Cond = BI->getCondition();
1826  if (ConstantInt *SimpleCond =
1827  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1828  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1829  BBWorklist.insert(NextBB);
1830  KnownSuccessors[BB] = NextBB;
1831  findDeadBlocks(BB, NextBB);
1832  continue;
1833  }
1834  }
1835  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1836  Value *Cond = SI->getCondition();
1837  if (ConstantInt *SimpleCond =
1838  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1839  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1840  BBWorklist.insert(NextBB);
1841  KnownSuccessors[BB] = NextBB;
1842  findDeadBlocks(BB, NextBB);
1843  continue;
1844  }
1845  }
1846 
1847  // If we're unable to select a particular successor, just count all of
1848  // them.
1849  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1850  ++TIdx)
1851  BBWorklist.insert(TI->getSuccessor(TIdx));
1852 
1853  // If we had any successors at this point, than post-inlining is likely to
1854  // have them as well. Note that we assume any basic blocks which existed
1855  // due to branches or switches which folded above will also fold after
1856  // inlining.
1857  if (SingleBB && TI->getNumSuccessors() > 1) {
1858  // Take off the bonus we applied to the threshold.
1859  Threshold -= SingleBBBonus;
1860  SingleBB = false;
1861  }
1862  }
1863 
1864  bool OnlyOneCallAndLocalLinkage =
1865  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1866  // If this is a noduplicate call, we can still inline as long as
1867  // inlining this would cause the removal of the caller (so the instruction
1868  // is not actually duplicated, just moved).
1869  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1870  return false;
1871 
1872  // We applied the maximum possible vector bonus at the beginning. Now,
1873  // subtract the excess bonus, if any, from the Threshold before
1874  // comparing against Cost.
1875  if (NumVectorInstructions <= NumInstructions / 10)
1876  Threshold -= VectorBonus;
1877  else if (NumVectorInstructions <= NumInstructions / 2)
1878  Threshold -= VectorBonus/2;
1879 
1880  return Cost < std::max(1, Threshold);
1881 }
1882 
1883 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1884 /// Dump stats about this call's analysis.
1886 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1887  DEBUG_PRINT_STAT(NumConstantArgs);
1888  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1889  DEBUG_PRINT_STAT(NumAllocaArgs);
1890  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1891  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1892  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1893  DEBUG_PRINT_STAT(NumInstructions);
1894  DEBUG_PRINT_STAT(SROACostSavings);
1895  DEBUG_PRINT_STAT(SROACostSavingsLost);
1896  DEBUG_PRINT_STAT(LoadEliminationCost);
1897  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1898  DEBUG_PRINT_STAT(Cost);
1899  DEBUG_PRINT_STAT(Threshold);
1900 #undef DEBUG_PRINT_STAT
1901 }
1902 #endif
1903 
1904 /// Test that there are no attribute conflicts between Caller and Callee
1905 /// that prevent inlining.
1907  Function *Callee,
1908  TargetTransformInfo &TTI) {
1909  return TTI.areInlineCompatible(Caller, Callee) &&
1910  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1911 }
1912 
1914  int Cost = 0;
1915  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1916  if (CS.isByValArgument(I)) {
1917  // We approximate the number of loads and stores needed by dividing the
1918  // size of the byval type by the target's pointer size.
1919  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1920  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1921  unsigned AS = PTy->getAddressSpace();
1922  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1923  // Ceiling division.
1924  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1925 
1926  // If it generates more than 8 stores it is likely to be expanded as an
1927  // inline memcpy so we take that as an upper bound. Otherwise we assume
1928  // one load and one store per word copied.
1929  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1930  // here instead of a magic number of 8, but it's not available via
1931  // DataLayout.
1932  NumStores = std::min(NumStores, 8U);
1933 
1934  Cost += 2 * NumStores * InlineConstants::InstrCost;
1935  } else {
1936  // For non-byval arguments subtract off one instruction per call
1937  // argument.
1939  }
1940  }
1941  // The call instruction also disappears after inlining.
1942  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1943  return Cost;
1944 }
1945 
1947  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1948  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1951  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1952  GetAssumptionCache, GetBFI, PSI, ORE);
1953 }
1954 
1956  CallSite CS, Function *Callee, const InlineParams &Params,
1957  TargetTransformInfo &CalleeTTI,
1958  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1961 
1962  // Cannot inline indirect calls.
1963  if (!Callee)
1964  return llvm::InlineCost::getNever();
1965 
1966  // Never inline calls with byval arguments that does not have the alloca
1967  // address space. Since byval arguments can be replaced with a copy to an
1968  // alloca, the inlined code would need to be adjusted to handle that the
1969  // argument is in the alloca address space (so it is a little bit complicated
1970  // to solve).
1971  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
1972  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
1973  if (CS.isByValArgument(I)) {
1974  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1975  if (PTy->getAddressSpace() != AllocaAS)
1976  return llvm::InlineCost::getNever();
1977  }
1978 
1979  // Calls to functions with always-inline attributes should be inlined
1980  // whenever possible.
1981  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1982  if (isInlineViable(*Callee))
1983  return llvm::InlineCost::getAlways();
1984  return llvm::InlineCost::getNever();
1985  }
1986 
1987  // Never inline functions with conflicting attributes (unless callee has
1988  // always-inline attribute).
1989  Function *Caller = CS.getCaller();
1990  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
1991  return llvm::InlineCost::getNever();
1992 
1993  // Don't inline this call if the caller has the optnone attribute.
1994  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
1995  return llvm::InlineCost::getNever();
1996 
1997  // Don't inline functions which can be interposed at link-time. Don't inline
1998  // functions marked noinline or call sites marked noinline.
1999  // Note: inlining non-exact non-interposable functions is fine, since we know
2000  // we have *a* correct implementation of the source level function.
2001  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
2002  CS.isNoInline())
2003  return llvm::InlineCost::getNever();
2004 
2005  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2006  << "... (caller:" << Caller->getName() << ")\n");
2007 
2008  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
2009  Params);
2010  bool ShouldInline = CA.analyzeCall(CS);
2011 
2012  LLVM_DEBUG(CA.dump());
2013 
2014  // Check if there was a reason to force inlining or no inlining.
2015  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2016  return InlineCost::getNever();
2017  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2018  return InlineCost::getAlways();
2019 
2020  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2021 }
2022 
2024  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2025  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2026  // Disallow inlining of functions which contain indirect branches or
2027  // blockaddresses.
2028  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
2029  return false;
2030 
2031  for (auto &II : *BI) {
2032  CallSite CS(&II);
2033  if (!CS)
2034  continue;
2035 
2036  // Disallow recursive calls.
2037  if (&F == CS.getCalledFunction())
2038  return false;
2039 
2040  // Disallow calls which expose returns-twice to a function not previously
2041  // attributed as such.
2042  if (!ReturnsTwice && CS.isCall() &&
2043  cast<CallInst>(CS.getInstruction())->canReturnTwice())
2044  return false;
2045 
2046  if (CS.getCalledFunction())
2047  switch (CS.getCalledFunction()->getIntrinsicID()) {
2048  default:
2049  break;
2050  // Disallow inlining of @llvm.icall.branch.funnel because current
2051  // backend can't separate call targets from call arguments.
2052  case llvm::Intrinsic::icall_branch_funnel:
2053  // Disallow inlining functions that call @llvm.localescape. Doing this
2054  // correctly would require major changes to the inliner.
2055  case llvm::Intrinsic::localescape:
2056  // Disallow inlining of functions that access VarArgs.
2057  case llvm::Intrinsic::vastart:
2058  case llvm::Intrinsic::vaend:
2059  return false;
2060  }
2061  }
2062  }
2063 
2064  return true;
2065 }
2066 
2067 // APIs to create InlineParams based on command line flags and/or other
2068 // parameters.
2069 
2071  InlineParams Params;
2072 
2073  // This field is the threshold to use for a callee by default. This is
2074  // derived from one or more of:
2075  // * optimization or size-optimization levels,
2076  // * a value passed to createFunctionInliningPass function, or
2077  // * the -inline-threshold flag.
2078  // If the -inline-threshold flag is explicitly specified, that is used
2079  // irrespective of anything else.
2080  if (InlineThreshold.getNumOccurrences() > 0)
2082  else
2083  Params.DefaultThreshold = Threshold;
2084 
2085  // Set the HintThreshold knob from the -inlinehint-threshold.
2086  Params.HintThreshold = HintThreshold;
2087 
2088  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2089  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2090 
2091  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2092  // populate LocallyHotCallSiteThreshold. Later, we populate
2093  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2094  // we know that optimization level is O3 (in the getInlineParams variant that
2095  // takes the opt and size levels).
2096  // FIXME: Remove this check (and make the assignment unconditional) after
2097  // addressing size regression issues at O2.
2098  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2100 
2101  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2103 
2104  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2105  // -inlinehint-threshold commandline option is not explicitly given. If that
2106  // option is present, then its value applies even for callees with size and
2107  // minsize attributes.
2108  // If the -inline-threshold is not specified, set the ColdThreshold from the
2109  // -inlinecold-threshold even if it is not explicitly passed. If
2110  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2111  // explicitly specified to set the ColdThreshold knob
2112  if (InlineThreshold.getNumOccurrences() == 0) {
2115  Params.ColdThreshold = ColdThreshold;
2116  } else if (ColdThreshold.getNumOccurrences() > 0) {
2117  Params.ColdThreshold = ColdThreshold;
2118  }
2119  return Params;
2120 }
2121 
2124 }
2125 
2126 // Compute the default threshold for inlining based on the opt level and the
2127 // size opt level.
2128 static int computeThresholdFromOptLevels(unsigned OptLevel,
2129  unsigned SizeOptLevel) {
2130  if (OptLevel > 2)
2132  if (SizeOptLevel == 1) // -Os
2134  if (SizeOptLevel == 2) // -Oz
2136  return InlineThreshold;
2137 }
2138 
2139 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2140  auto Params =
2141  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2142  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2143  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2144  // when it is specified explicitly.
2145  if (OptLevel > 2)
2147  return Params;
2148 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
bool onlyReadsMemory() const
Determine if the function does not access or only reads memory.
Definition: Function.h:454
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:574
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:130
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
bool isSimple() const
Definition: Instructions.h:262
bool empty() const
Definition: Function.h:648
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:100
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:555
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:141
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
iterator end()
Definition: Function.h:644
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
const int ColdccPenalty
Definition: InlineCost.h:48
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:93
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1727
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:419
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:666
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:360
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1894
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2188
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:377
op_iterator op_begin()
Definition: User.h:230
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:768
Represents the cost of inlining a function.
Definition: InlineCost.h:65
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:438
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Definition: CallSite.h:454
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:258
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:521
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2145
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:638
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:135
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:575
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:92
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
bool empty() const
Definition: BasicBlock.h:275
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:954
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:845
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:890
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1916
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:962
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:47
An instruction for storing to memory.
Definition: Instructions.h:306
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:642
const int CallPenalty
Definition: InlineCost.h:46
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1740
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:626
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2110
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:45
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:362
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:264
op_iterator op_end()
Definition: User.h:232
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:144
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:584
static InlineCost getNever()
Definition: InlineCost.h:89
op_range operands()
Definition: User.h:238
Value * getPointerOperand()
Definition: Instructions.h:270
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
arg_iterator arg_begin()
Definition: Function.h:657
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1992
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:654
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
const AMDGPUAS & AS
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:151
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:831
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:387
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
IterTy arg_begin() const
Definition: CallSite.h:571
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
#define DEBUG_TYPE
Definition: InlineCost.cpp:43
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:611
bool isConditional() const
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:113
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:138
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:567
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:180
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:399
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1497
const Value * getFalseValue() const
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Basic Alias true
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:959
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:543
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1714
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:581
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:598
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:52
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
succ_range successors(BasicBlock *BB)
Definition: CFG.h:149
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:154
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2134
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static InlineCost getAlways()
Definition: InlineCost.h:86
Value * getPointerOperand()
Definition: Instructions.h:398
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:147
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:132
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:81
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)