LLVM  7.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements inline cost analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/CFG.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/InstVisitor.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Operator.h"
37 #include "llvm/Support/Debug.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "inline-cost"
43 
44 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
45 
47  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
48  cl::desc("Control the amount of inlining to perform (default = 225)"));
49 
51  "inlinehint-threshold", cl::Hidden, cl::init(325),
52  cl::desc("Threshold for inlining functions with inline hint"));
53 
54 static cl::opt<int>
55  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
56  cl::init(45),
57  cl::desc("Threshold for inlining cold callsites"));
58 
59 // We introduce this threshold to help performance of instrumentation based
60 // PGO before we actually hook up inliner with analysis passes such as BPI and
61 // BFI.
63  "inlinecold-threshold", cl::Hidden, cl::init(45),
64  cl::desc("Threshold for inlining functions with cold attribute"));
65 
66 static cl::opt<int>
67  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
69  cl::desc("Threshold for hot callsites "));
70 
72  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
73  cl::desc("Threshold for locally hot callsites "));
74 
76  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
77  cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
78  "entry frequency, for a callsite to be cold in the absence of "
79  "profile information."));
80 
82  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
83  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
84  "entry frequency, for a callsite to be hot in the absence of "
85  "profile information."));
86 
88  "inline-cost-full", cl::Hidden, cl::init(false),
89  cl::desc("Compute the full inline cost of a call site even when the cost "
90  "exceeds the threshold."));
91 
92 namespace {
93 
94 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
96  friend class InstVisitor<CallAnalyzer, bool>;
97 
98  /// The TargetTransformInfo available for this compilation.
99  const TargetTransformInfo &TTI;
100 
101  /// Getter for the cache of @llvm.assume intrinsics.
102  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
103 
104  /// Getter for BlockFrequencyInfo
106 
107  /// Profile summary information.
108  ProfileSummaryInfo *PSI;
109 
110  /// The called function.
111  Function &F;
112 
113  // Cache the DataLayout since we use it a lot.
114  const DataLayout &DL;
115 
116  /// The OptimizationRemarkEmitter available for this compilation.
118 
119  /// The candidate callsite being analyzed. Please do not use this to do
120  /// analysis in the caller function; we want the inline cost query to be
121  /// easily cacheable. Instead, use the cover function paramHasAttr.
122  CallSite CandidateCS;
123 
124  /// Tunable parameters that control the analysis.
125  const InlineParams &Params;
126 
127  int Threshold;
128  int Cost;
129  bool ComputeFullInlineCost;
130 
131  bool IsCallerRecursive;
132  bool IsRecursiveCall;
133  bool ExposesReturnsTwice;
134  bool HasDynamicAlloca;
135  bool ContainsNoDuplicateCall;
136  bool HasReturn;
137  bool HasIndirectBr;
138  bool HasFrameEscape;
139  bool UsesVarArgs;
140 
141  /// Number of bytes allocated statically by the callee.
142  uint64_t AllocatedSize;
143  unsigned NumInstructions, NumVectorInstructions;
144  int VectorBonus, TenPercentVectorBonus;
145  // Bonus to be applied when the callee has only one reachable basic block.
146  int SingleBBBonus;
147 
148  /// While we walk the potentially-inlined instructions, we build up and
149  /// maintain a mapping of simplified values specific to this callsite. The
150  /// idea is to propagate any special information we have about arguments to
151  /// this call through the inlinable section of the function, and account for
152  /// likely simplifications post-inlining. The most important aspect we track
153  /// is CFG altering simplifications -- when we prove a basic block dead, that
154  /// can cause dramatic shifts in the cost of inlining a function.
155  DenseMap<Value *, Constant *> SimplifiedValues;
156 
157  /// Keep track of the values which map back (through function arguments) to
158  /// allocas on the caller stack which could be simplified through SROA.
159  DenseMap<Value *, Value *> SROAArgValues;
160 
161  /// The mapping of caller Alloca values to their accumulated cost savings. If
162  /// we have to disable SROA for one of the allocas, this tells us how much
163  /// cost must be added.
164  DenseMap<Value *, int> SROAArgCosts;
165 
166  /// Keep track of values which map to a pointer base and constant offset.
168 
169  /// Keep track of dead blocks due to the constant arguments.
170  SetVector<BasicBlock *> DeadBlocks;
171 
172  /// The mapping of the blocks to their known unique successors due to the
173  /// constant arguments.
174  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
175 
176  /// Model the elimination of repeated loads that is expected to happen
177  /// whenever we simplify away the stores that would otherwise cause them to be
178  /// loads.
179  bool EnableLoadElimination;
180  SmallPtrSet<Value *, 16> LoadAddrSet;
181  int LoadEliminationCost;
182 
183  // Custom simplification helper routines.
184  bool isAllocaDerivedArg(Value *V);
185  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
187  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
188  void disableSROA(Value *V);
189  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
190  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
191  int InstructionCost);
192  void disableLoadElimination();
193  bool isGEPFree(GetElementPtrInst &GEP);
194  bool canFoldInboundsGEP(GetElementPtrInst &I);
195  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
196  bool simplifyCallSite(Function *F, CallSite CS);
197  template <typename Callable>
198  bool simplifyInstruction(Instruction &I, Callable Evaluate);
199  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
200 
201  /// Return true if the given argument to the function being considered for
202  /// inlining has the given attribute set either at the call site or the
203  /// function declaration. Primarily used to inspect call site specific
204  /// attributes since these can be more precise than the ones on the callee
205  /// itself.
206  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
207 
208  /// Return true if the given value is known non null within the callee if
209  /// inlined through this particular callsite.
210  bool isKnownNonNullInCallee(Value *V);
211 
212  /// Update Threshold based on callsite properties such as callee
213  /// attributes and callee hotness for PGO builds. The Callee is explicitly
214  /// passed to support analyzing indirect calls whose target is inferred by
215  /// analysis.
216  void updateThreshold(CallSite CS, Function &Callee);
217 
218  /// Return true if size growth is allowed when inlining the callee at CS.
219  bool allowSizeGrowth(CallSite CS);
220 
221  /// Return true if \p CS is a cold callsite.
222  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
223 
224  /// Return a higher threshold if \p CS is a hot callsite.
225  Optional<int> getHotCallSiteThreshold(CallSite CS,
226  BlockFrequencyInfo *CallerBFI);
227 
228  // Custom analysis routines.
229  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
230 
231  // Disable several entry points to the visitor so we don't accidentally use
232  // them by declaring but not defining them here.
233  void visit(Module *);
234  void visit(Module &);
235  void visit(Function *);
236  void visit(Function &);
237  void visit(BasicBlock *);
238  void visit(BasicBlock &);
239 
240  // Provide base case for our instruction visit.
241  bool visitInstruction(Instruction &I);
242 
243  // Our visit overrides.
244  bool visitAlloca(AllocaInst &I);
245  bool visitPHI(PHINode &I);
246  bool visitGetElementPtr(GetElementPtrInst &I);
247  bool visitBitCast(BitCastInst &I);
248  bool visitPtrToInt(PtrToIntInst &I);
249  bool visitIntToPtr(IntToPtrInst &I);
250  bool visitCastInst(CastInst &I);
251  bool visitUnaryInstruction(UnaryInstruction &I);
252  bool visitCmpInst(CmpInst &I);
253  bool visitSub(BinaryOperator &I);
254  bool visitBinaryOperator(BinaryOperator &I);
255  bool visitLoad(LoadInst &I);
256  bool visitStore(StoreInst &I);
257  bool visitExtractValue(ExtractValueInst &I);
258  bool visitInsertValue(InsertValueInst &I);
259  bool visitCallSite(CallSite CS);
260  bool visitReturnInst(ReturnInst &RI);
261  bool visitBranchInst(BranchInst &BI);
262  bool visitSelectInst(SelectInst &SI);
263  bool visitSwitchInst(SwitchInst &SI);
264  bool visitIndirectBrInst(IndirectBrInst &IBI);
265  bool visitResumeInst(ResumeInst &RI);
266  bool visitCleanupReturnInst(CleanupReturnInst &RI);
267  bool visitCatchReturnInst(CatchReturnInst &RI);
268  bool visitUnreachableInst(UnreachableInst &I);
269 
270 public:
271  CallAnalyzer(const TargetTransformInfo &TTI,
272  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
275  Function &Callee, CallSite CSArg, const InlineParams &Params)
276  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
277  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
278  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
279  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
280  Params.ComputeFullInlineCost || ORE),
281  IsCallerRecursive(false), IsRecursiveCall(false),
282  ExposesReturnsTwice(false), HasDynamicAlloca(false),
283  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
284  HasFrameEscape(false), UsesVarArgs(false), AllocatedSize(0), NumInstructions(0),
285  NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
286  EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0),
287  NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
288  NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
289  SROACostSavings(0), SROACostSavingsLost(0) {}
290 
291  bool analyzeCall(CallSite CS);
292 
293  int getThreshold() { return Threshold; }
294  int getCost() { return Cost; }
295 
296  // Keep a bunch of stats about the cost savings found so we can print them
297  // out when debugging.
298  unsigned NumConstantArgs;
299  unsigned NumConstantOffsetPtrArgs;
300  unsigned NumAllocaArgs;
301  unsigned NumConstantPtrCmps;
302  unsigned NumConstantPtrDiffs;
303  unsigned NumInstructionsSimplified;
304  unsigned SROACostSavings;
305  unsigned SROACostSavingsLost;
306 
307  void dump();
308 };
309 
310 } // namespace
311 
312 /// \brief Test whether the given value is an Alloca-derived function argument.
313 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
314  return SROAArgValues.count(V);
315 }
316 
317 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
318 /// Returns false if V does not map to a SROA-candidate.
319 bool CallAnalyzer::lookupSROAArgAndCost(
321  if (SROAArgValues.empty() || SROAArgCosts.empty())
322  return false;
323 
324  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
325  if (ArgIt == SROAArgValues.end())
326  return false;
327 
328  Arg = ArgIt->second;
329  CostIt = SROAArgCosts.find(Arg);
330  return CostIt != SROAArgCosts.end();
331 }
332 
333 /// \brief Disable SROA for the candidate marked by this cost iterator.
334 ///
335 /// This marks the candidate as no longer viable for SROA, and adds the cost
336 /// savings associated with it back into the inline cost measurement.
337 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
338  // If we're no longer able to perform SROA we need to undo its cost savings
339  // and prevent subsequent analysis.
340  Cost += CostIt->second;
341  SROACostSavings -= CostIt->second;
342  SROACostSavingsLost += CostIt->second;
343  SROAArgCosts.erase(CostIt);
344  disableLoadElimination();
345 }
346 
347 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
348 void CallAnalyzer::disableSROA(Value *V) {
349  Value *SROAArg;
351  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
352  disableSROA(CostIt);
353 }
354 
355 /// \brief Accumulate the given cost for a particular SROA candidate.
356 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
357  int InstructionCost) {
358  CostIt->second += InstructionCost;
359  SROACostSavings += InstructionCost;
360 }
361 
362 void CallAnalyzer::disableLoadElimination() {
363  if (EnableLoadElimination) {
364  Cost += LoadEliminationCost;
365  LoadEliminationCost = 0;
366  EnableLoadElimination = false;
367  }
368 }
369 
370 /// \brief Accumulate a constant GEP offset into an APInt if possible.
371 ///
372 /// Returns false if unable to compute the offset for any reason. Respects any
373 /// simplified values known during the analysis of this callsite.
374 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
375  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
376  assert(IntPtrWidth == Offset.getBitWidth());
377 
378  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
379  GTI != GTE; ++GTI) {
380  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
381  if (!OpC)
382  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
383  OpC = dyn_cast<ConstantInt>(SimpleOp);
384  if (!OpC)
385  return false;
386  if (OpC->isZero())
387  continue;
388 
389  // Handle a struct index, which adds its field offset to the pointer.
390  if (StructType *STy = GTI.getStructTypeOrNull()) {
391  unsigned ElementIdx = OpC->getZExtValue();
392  const StructLayout *SL = DL.getStructLayout(STy);
393  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
394  continue;
395  }
396 
397  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
398  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
399  }
400  return true;
401 }
402 
403 /// \brief Use TTI to check whether a GEP is free.
404 ///
405 /// Respects any simplified values known during the analysis of this callsite.
406 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
407  SmallVector<Value *, 4> Operands;
408  Operands.push_back(GEP.getOperand(0));
409  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
410  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
411  Operands.push_back(SimpleOp);
412  else
413  Operands.push_back(*I);
414  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
415 }
416 
417 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
418  // Check whether inlining will turn a dynamic alloca into a static
419  // alloca and handle that case.
420  if (I.isArrayAllocation()) {
421  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
422  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
423  Type *Ty = I.getAllocatedType();
424  AllocatedSize = SaturatingMultiplyAdd(
425  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
426  return Base::visitAlloca(I);
427  }
428  }
429 
430  // Accumulate the allocated size.
431  if (I.isStaticAlloca()) {
432  Type *Ty = I.getAllocatedType();
433  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
434  }
435 
436  // We will happily inline static alloca instructions.
437  if (I.isStaticAlloca())
438  return Base::visitAlloca(I);
439 
440  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
441  // a variety of reasons, and so we would like to not inline them into
442  // functions which don't currently have a dynamic alloca. This simply
443  // disables inlining altogether in the presence of a dynamic alloca.
444  HasDynamicAlloca = true;
445  return false;
446 }
447 
448 bool CallAnalyzer::visitPHI(PHINode &I) {
449  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
450  // though we don't want to propagate it's bonuses. The idea is to disable
451  // SROA if it *might* be used in an inappropriate manner.
452 
453  // Phi nodes are always zero-cost.
454  // FIXME: Pointer sizes may differ between different address spaces, so do we
455  // need to use correct address space in the call to getPointerSizeInBits here?
456  // Or could we skip the getPointerSizeInBits call completely? As far as I can
457  // see the ZeroOffset is used as a dummy value, so we can probably use any
458  // bit width for the ZeroOffset?
459  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
460  bool CheckSROA = I.getType()->isPointerTy();
461 
462  // Track the constant or pointer with constant offset we've seen so far.
463  Constant *FirstC = nullptr;
464  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
465  Value *FirstV = nullptr;
466 
467  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
468  BasicBlock *Pred = I.getIncomingBlock(i);
469  // If the incoming block is dead, skip the incoming block.
470  if (DeadBlocks.count(Pred))
471  continue;
472  // If the parent block of phi is not the known successor of the incoming
473  // block, skip the incoming block.
474  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
475  if (KnownSuccessor && KnownSuccessor != I.getParent())
476  continue;
477 
478  Value *V = I.getIncomingValue(i);
479  // If the incoming value is this phi itself, skip the incoming value.
480  if (&I == V)
481  continue;
482 
483  Constant *C = dyn_cast<Constant>(V);
484  if (!C)
485  C = SimplifiedValues.lookup(V);
486 
487  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
488  if (!C && CheckSROA)
489  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
490 
491  if (!C && !BaseAndOffset.first)
492  // The incoming value is neither a constant nor a pointer with constant
493  // offset, exit early.
494  return true;
495 
496  if (FirstC) {
497  if (FirstC == C)
498  // If we've seen a constant incoming value before and it is the same
499  // constant we see this time, continue checking the next incoming value.
500  continue;
501  // Otherwise early exit because we either see a different constant or saw
502  // a constant before but we have a pointer with constant offset this time.
503  return true;
504  }
505 
506  if (FirstV) {
507  // The same logic as above, but check pointer with constant offset here.
508  if (FirstBaseAndOffset == BaseAndOffset)
509  continue;
510  return true;
511  }
512 
513  if (C) {
514  // This is the 1st time we've seen a constant, record it.
515  FirstC = C;
516  continue;
517  }
518 
519  // The remaining case is that this is the 1st time we've seen a pointer with
520  // constant offset, record it.
521  FirstV = V;
522  FirstBaseAndOffset = BaseAndOffset;
523  }
524 
525  // Check if we can map phi to a constant.
526  if (FirstC) {
527  SimplifiedValues[&I] = FirstC;
528  return true;
529  }
530 
531  // Check if we can map phi to a pointer with constant offset.
532  if (FirstBaseAndOffset.first) {
533  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
534 
535  Value *SROAArg;
537  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
538  SROAArgValues[&I] = SROAArg;
539  }
540 
541  return true;
542 }
543 
544 /// \brief Check we can fold GEPs of constant-offset call site argument pointers.
545 /// This requires target data and inbounds GEPs.
546 ///
547 /// \return true if the specified GEP can be folded.
548 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
549  // Check if we have a base + offset for the pointer.
550  std::pair<Value *, APInt> BaseAndOffset =
551  ConstantOffsetPtrs.lookup(I.getPointerOperand());
552  if (!BaseAndOffset.first)
553  return false;
554 
555  // Check if the offset of this GEP is constant, and if so accumulate it
556  // into Offset.
557  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
558  return false;
559 
560  // Add the result as a new mapping to Base + Offset.
561  ConstantOffsetPtrs[&I] = BaseAndOffset;
562 
563  return true;
564 }
565 
566 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
567  Value *SROAArg;
569  bool SROACandidate =
570  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
571 
572  // Lambda to check whether a GEP's indices are all constant.
573  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
574  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
575  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
576  return false;
577  return true;
578  };
579 
580  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
581  if (SROACandidate)
582  SROAArgValues[&I] = SROAArg;
583 
584  // Constant GEPs are modeled as free.
585  return true;
586  }
587 
588  // Variable GEPs will require math and will disable SROA.
589  if (SROACandidate)
590  disableSROA(CostIt);
591  return isGEPFree(I);
592 }
593 
594 /// Simplify \p I if its operands are constants and update SimplifiedValues.
595 /// \p Evaluate is a callable specific to instruction type that evaluates the
596 /// instruction when all the operands are constants.
597 template <typename Callable>
598 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
600  for (Value *Op : I.operands()) {
601  Constant *COp = dyn_cast<Constant>(Op);
602  if (!COp)
603  COp = SimplifiedValues.lookup(Op);
604  if (!COp)
605  return false;
606  COps.push_back(COp);
607  }
608  auto *C = Evaluate(COps);
609  if (!C)
610  return false;
611  SimplifiedValues[&I] = C;
612  return true;
613 }
614 
615 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
616  // Propagate constants through bitcasts.
617  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
618  return ConstantExpr::getBitCast(COps[0], I.getType());
619  }))
620  return true;
621 
622  // Track base/offsets through casts
623  std::pair<Value *, APInt> BaseAndOffset =
624  ConstantOffsetPtrs.lookup(I.getOperand(0));
625  // Casts don't change the offset, just wrap it up.
626  if (BaseAndOffset.first)
627  ConstantOffsetPtrs[&I] = BaseAndOffset;
628 
629  // Also look for SROA candidates here.
630  Value *SROAArg;
632  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
633  SROAArgValues[&I] = SROAArg;
634 
635  // Bitcasts are always zero cost.
636  return true;
637 }
638 
639 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
640  // Propagate constants through ptrtoint.
641  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
642  return ConstantExpr::getPtrToInt(COps[0], I.getType());
643  }))
644  return true;
645 
646  // Track base/offset pairs when converted to a plain integer provided the
647  // integer is large enough to represent the pointer.
648  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
649  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
650  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
651  std::pair<Value *, APInt> BaseAndOffset =
652  ConstantOffsetPtrs.lookup(I.getOperand(0));
653  if (BaseAndOffset.first)
654  ConstantOffsetPtrs[&I] = BaseAndOffset;
655  }
656 
657  // This is really weird. Technically, ptrtoint will disable SROA. However,
658  // unless that ptrtoint is *used* somewhere in the live basic blocks after
659  // inlining, it will be nuked, and SROA should proceed. All of the uses which
660  // would block SROA would also block SROA if applied directly to a pointer,
661  // and so we can just add the integer in here. The only places where SROA is
662  // preserved either cannot fire on an integer, or won't in-and-of themselves
663  // disable SROA (ext) w/o some later use that we would see and disable.
664  Value *SROAArg;
666  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
667  SROAArgValues[&I] = SROAArg;
668 
669  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
670 }
671 
672 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
673  // Propagate constants through ptrtoint.
674  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
675  return ConstantExpr::getIntToPtr(COps[0], I.getType());
676  }))
677  return true;
678 
679  // Track base/offset pairs when round-tripped through a pointer without
680  // modifications provided the integer is not too large.
681  Value *Op = I.getOperand(0);
682  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
683  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
684  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
685  if (BaseAndOffset.first)
686  ConstantOffsetPtrs[&I] = BaseAndOffset;
687  }
688 
689  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
690  Value *SROAArg;
692  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
693  SROAArgValues[&I] = SROAArg;
694 
695  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
696 }
697 
698 bool CallAnalyzer::visitCastInst(CastInst &I) {
699  // Propagate constants through ptrtoint.
700  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
701  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
702  }))
703  return true;
704 
705  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
706  disableSROA(I.getOperand(0));
707 
708  // If this is a floating-point cast, and the target says this operation
709  // is expensive, this may eventually become a library call. Treat the cost
710  // as such.
711  switch (I.getOpcode()) {
712  case Instruction::FPTrunc:
713  case Instruction::FPExt:
714  case Instruction::UIToFP:
715  case Instruction::SIToFP:
716  case Instruction::FPToUI:
717  case Instruction::FPToSI:
718  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
720  default:
721  break;
722  }
723 
724  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
725 }
726 
727 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
728  Value *Operand = I.getOperand(0);
729  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
730  return ConstantFoldInstOperands(&I, COps[0], DL);
731  }))
732  return true;
733 
734  // Disable any SROA on the argument to arbitrary unary operators.
735  disableSROA(Operand);
736 
737  return false;
738 }
739 
740 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
741  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
742 }
743 
744 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
745  // Does the *call site* have the NonNull attribute set on an argument? We
746  // use the attribute on the call site to memoize any analysis done in the
747  // caller. This will also trip if the callee function has a non-null
748  // parameter attribute, but that's a less interesting case because hopefully
749  // the callee would already have been simplified based on that.
750  if (Argument *A = dyn_cast<Argument>(V))
751  if (paramHasAttr(A, Attribute::NonNull))
752  return true;
753 
754  // Is this an alloca in the caller? This is distinct from the attribute case
755  // above because attributes aren't updated within the inliner itself and we
756  // always want to catch the alloca derived case.
757  if (isAllocaDerivedArg(V))
758  // We can actually predict the result of comparisons between an
759  // alloca-derived value and null. Note that this fires regardless of
760  // SROA firing.
761  return true;
762 
763  return false;
764 }
765 
766 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
767  // If the normal destination of the invoke or the parent block of the call
768  // site is unreachable-terminated, there is little point in inlining this
769  // unless there is literally zero cost.
770  // FIXME: Note that it is possible that an unreachable-terminated block has a
771  // hot entry. For example, in below scenario inlining hot_call_X() may be
772  // beneficial :
773  // main() {
774  // hot_call_1();
775  // ...
776  // hot_call_N()
777  // exit(0);
778  // }
779  // For now, we are not handling this corner case here as it is rare in real
780  // code. In future, we should elaborate this based on BPI and BFI in more
781  // general threshold adjusting heuristics in updateThreshold().
782  Instruction *Instr = CS.getInstruction();
783  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
784  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
785  return false;
786  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
787  return false;
788 
789  return true;
790 }
791 
793  // If global profile summary is available, then callsite's coldness is
794  // determined based on that.
795  if (PSI && PSI->hasProfileSummary())
796  return PSI->isColdCallSite(CS, CallerBFI);
797 
798  // Otherwise we need BFI to be available.
799  if (!CallerBFI)
800  return false;
801 
802  // Determine if the callsite is cold relative to caller's entry. We could
803  // potentially cache the computation of scaled entry frequency, but the added
804  // complexity is not worth it unless this scaling shows up high in the
805  // profiles.
806  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
807  auto CallSiteBB = CS.getInstruction()->getParent();
808  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
809  auto CallerEntryFreq =
810  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
811  return CallSiteFreq < CallerEntryFreq * ColdProb;
812 }
813 
815 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
816  BlockFrequencyInfo *CallerBFI) {
817 
818  // If global profile summary is available, then callsite's hotness is
819  // determined based on that.
820  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
821  return Params.HotCallSiteThreshold;
822 
823  // Otherwise we need BFI to be available and to have a locally hot callsite
824  // threshold.
825  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
826  return None;
827 
828  // Determine if the callsite is hot relative to caller's entry. We could
829  // potentially cache the computation of scaled entry frequency, but the added
830  // complexity is not worth it unless this scaling shows up high in the
831  // profiles.
832  auto CallSiteBB = CS.getInstruction()->getParent();
833  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
834  auto CallerEntryFreq = CallerBFI->getEntryFreq();
835  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
836  return Params.LocallyHotCallSiteThreshold;
837 
838  // Otherwise treat it normally.
839  return None;
840 }
841 
842 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
843  // If no size growth is allowed for this inlining, set Threshold to 0.
844  if (!allowSizeGrowth(CS)) {
845  Threshold = 0;
846  return;
847  }
848 
849  Function *Caller = CS.getCaller();
850 
851  // return min(A, B) if B is valid.
852  auto MinIfValid = [](int A, Optional<int> B) {
853  return B ? std::min(A, B.getValue()) : A;
854  };
855 
856  // return max(A, B) if B is valid.
857  auto MaxIfValid = [](int A, Optional<int> B) {
858  return B ? std::max(A, B.getValue()) : A;
859  };
860 
861  // Various bonus percentages. These are multiplied by Threshold to get the
862  // bonus values.
863  // SingleBBBonus: This bonus is applied if the callee has a single reachable
864  // basic block at the given callsite context. This is speculatively applied
865  // and withdrawn if more than one basic block is seen.
866  //
867  // Vector bonuses: We want to more aggressively inline vector-dense kernels
868  // and apply this bonus based on the percentage of vector instructions. A
869  // bonus is applied if the vector instructions exceed 50% and half that amount
870  // is applied if it exceeds 10%. Note that these bonuses are some what
871  // arbitrary and evolved over time by accident as much as because they are
872  // principled bonuses.
873  // FIXME: It would be nice to base the bonus values on something more
874  // scientific.
875  //
876  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
877  // of the last call to a static function as inlining such functions is
878  // guaranteed to reduce code size.
879  //
880  // These bonus percentages may be set to 0 based on properties of the caller
881  // and the callsite.
882  int SingleBBBonusPercent = 50;
883  int VectorBonusPercent = 150;
885 
886  // Lambda to set all the above bonus and bonus percentages to 0.
887  auto DisallowAllBonuses = [&]() {
888  SingleBBBonusPercent = 0;
889  VectorBonusPercent = 0;
890  LastCallToStaticBonus = 0;
891  };
892 
893  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
894  // and reduce the threshold if the caller has the necessary attribute.
895  if (Caller->optForMinSize()) {
896  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
897  // For minsize, we want to disable the single BB bonus and the vector
898  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
899  // a static function will, at the minimum, eliminate the parameter setup and
900  // call/return instructions.
901  SingleBBBonusPercent = 0;
902  VectorBonusPercent = 0;
903  } else if (Caller->optForSize())
904  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
905 
906  // Adjust the threshold based on inlinehint attribute and profile based
907  // hotness information if the caller does not have MinSize attribute.
908  if (!Caller->optForMinSize()) {
909  if (Callee.hasFnAttribute(Attribute::InlineHint))
910  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
911 
912  // FIXME: After switching to the new passmanager, simplify the logic below
913  // by checking only the callsite hotness/coldness as we will reliably
914  // have local profile information.
915  //
916  // Callsite hotness and coldness can be determined if sample profile is
917  // used (which adds hotness metadata to calls) or if caller's
918  // BlockFrequencyInfo is available.
919  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
920  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
921  if (!Caller->optForSize() && HotCallSiteThreshold) {
922  DEBUG(dbgs() << "Hot callsite.\n");
923  // FIXME: This should update the threshold only if it exceeds the
924  // current threshold, but AutoFDO + ThinLTO currently relies on this
925  // behavior to prevent inlining of hot callsites during ThinLTO
926  // compile phase.
927  Threshold = HotCallSiteThreshold.getValue();
928  } else if (isColdCallSite(CS, CallerBFI)) {
929  DEBUG(dbgs() << "Cold callsite.\n");
930  // Do not apply bonuses for a cold callsite including the
931  // LastCallToStatic bonus. While this bonus might result in code size
932  // reduction, it can cause the size of a non-cold caller to increase
933  // preventing it from being inlined.
934  DisallowAllBonuses();
935  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
936  } else if (PSI) {
937  // Use callee's global profile information only if we have no way of
938  // determining this via callsite information.
939  if (PSI->isFunctionEntryHot(&Callee)) {
940  DEBUG(dbgs() << "Hot callee.\n");
941  // If callsite hotness can not be determined, we may still know
942  // that the callee is hot and treat it as a weaker hint for threshold
943  // increase.
944  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
945  } else if (PSI->isFunctionEntryCold(&Callee)) {
946  DEBUG(dbgs() << "Cold callee.\n");
947  // Do not apply bonuses for a cold callee including the
948  // LastCallToStatic bonus. While this bonus might result in code size
949  // reduction, it can cause the size of a non-cold caller to increase
950  // preventing it from being inlined.
951  DisallowAllBonuses();
952  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
953  }
954  }
955  }
956 
957  // Finally, take the target-specific inlining threshold multiplier into
958  // account.
959  Threshold *= TTI.getInliningThresholdMultiplier();
960 
961  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
962  VectorBonus = Threshold * VectorBonusPercent / 100;
963 
964  bool OnlyOneCallAndLocalLinkage =
965  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
966  // If there is only one call of the function, and it has internal linkage,
967  // the cost of inlining it drops dramatically. It may seem odd to update
968  // Cost in updateThreshold, but the bonus depends on the logic in this method.
969  if (OnlyOneCallAndLocalLinkage)
970  Cost -= LastCallToStaticBonus;
971 }
972 
973 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
974  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
975  // First try to handle simplified comparisons.
976  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
977  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
978  }))
979  return true;
980 
981  if (I.getOpcode() == Instruction::FCmp)
982  return false;
983 
984  // Otherwise look for a comparison between constant offset pointers with
985  // a common base.
986  Value *LHSBase, *RHSBase;
987  APInt LHSOffset, RHSOffset;
988  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
989  if (LHSBase) {
990  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
991  if (RHSBase && LHSBase == RHSBase) {
992  // We have common bases, fold the icmp to a constant based on the
993  // offsets.
994  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
995  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
996  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
997  SimplifiedValues[&I] = C;
998  ++NumConstantPtrCmps;
999  return true;
1000  }
1001  }
1002  }
1003 
1004  // If the comparison is an equality comparison with null, we can simplify it
1005  // if we know the value (argument) can't be null
1006  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1007  isKnownNonNullInCallee(I.getOperand(0))) {
1008  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1009  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1011  return true;
1012  }
1013  // Finally check for SROA candidates in comparisons.
1014  Value *SROAArg;
1016  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1017  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1018  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1019  return true;
1020  }
1021 
1022  disableSROA(CostIt);
1023  }
1024 
1025  return false;
1026 }
1027 
1028 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1029  // Try to handle a special case: we can fold computing the difference of two
1030  // constant-related pointers.
1031  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1032  Value *LHSBase, *RHSBase;
1033  APInt LHSOffset, RHSOffset;
1034  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1035  if (LHSBase) {
1036  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1037  if (RHSBase && LHSBase == RHSBase) {
1038  // We have common bases, fold the subtract to a constant based on the
1039  // offsets.
1040  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1041  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1042  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1043  SimplifiedValues[&I] = C;
1044  ++NumConstantPtrDiffs;
1045  return true;
1046  }
1047  }
1048  }
1049 
1050  // Otherwise, fall back to the generic logic for simplifying and handling
1051  // instructions.
1052  return Base::visitSub(I);
1053 }
1054 
1055 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1056  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1057  Constant *CLHS = dyn_cast<Constant>(LHS);
1058  if (!CLHS)
1059  CLHS = SimplifiedValues.lookup(LHS);
1060  Constant *CRHS = dyn_cast<Constant>(RHS);
1061  if (!CRHS)
1062  CRHS = SimplifiedValues.lookup(RHS);
1063 
1064  Value *SimpleV = nullptr;
1065  if (auto FI = dyn_cast<FPMathOperator>(&I))
1066  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1067  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1068  else
1069  SimpleV =
1070  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1071 
1072  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1073  SimplifiedValues[&I] = C;
1074 
1075  if (SimpleV)
1076  return true;
1077 
1078  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1079  disableSROA(LHS);
1080  disableSROA(RHS);
1081 
1082  // If the instruction is floating point, and the target says this operation
1083  // is expensive, this may eventually become a library call. Treat the cost
1084  // as such.
1085  if (I.getType()->isFloatingPointTy() &&
1086  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1088 
1089  return false;
1090 }
1091 
1092 bool CallAnalyzer::visitLoad(LoadInst &I) {
1093  Value *SROAArg;
1095  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1096  if (I.isSimple()) {
1097  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1098  return true;
1099  }
1100 
1101  disableSROA(CostIt);
1102  }
1103 
1104  // If the data is already loaded from this address and hasn't been clobbered
1105  // by any stores or calls, this load is likely to be redundant and can be
1106  // eliminated.
1107  if (EnableLoadElimination &&
1108  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1109  LoadEliminationCost += InlineConstants::InstrCost;
1110  return true;
1111  }
1112 
1113  return false;
1114 }
1115 
1116 bool CallAnalyzer::visitStore(StoreInst &I) {
1117  Value *SROAArg;
1119  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1120  if (I.isSimple()) {
1121  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1122  return true;
1123  }
1124 
1125  disableSROA(CostIt);
1126  }
1127 
1128  // The store can potentially clobber loads and prevent repeated loads from
1129  // being eliminated.
1130  // FIXME:
1131  // 1. We can probably keep an initial set of eliminatable loads substracted
1132  // from the cost even when we finally see a store. We just need to disable
1133  // *further* accumulation of elimination savings.
1134  // 2. We should probably at some point thread MemorySSA for the callee into
1135  // this and then use that to actually compute *really* precise savings.
1136  disableLoadElimination();
1137  return false;
1138 }
1139 
1140 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1141  // Constant folding for extract value is trivial.
1142  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1143  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1144  }))
1145  return true;
1146 
1147  // SROA can look through these but give them a cost.
1148  return false;
1149 }
1150 
1151 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1152  // Constant folding for insert value is trivial.
1153  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1154  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1155  /*InsertedValueOperand*/ COps[1],
1156  I.getIndices());
1157  }))
1158  return true;
1159 
1160  // SROA can look through these but give them a cost.
1161  return false;
1162 }
1163 
1164 /// \brief Try to simplify a call site.
1165 ///
1166 /// Takes a concrete function and callsite and tries to actually simplify it by
1167 /// analyzing the arguments and call itself with instsimplify. Returns true if
1168 /// it has simplified the callsite to some other entity (a constant), making it
1169 /// free.
1170 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1171  // FIXME: Using the instsimplify logic directly for this is inefficient
1172  // because we have to continually rebuild the argument list even when no
1173  // simplifications can be performed. Until that is fixed with remapping
1174  // inside of instsimplify, directly constant fold calls here.
1175  if (!canConstantFoldCallTo(CS, F))
1176  return false;
1177 
1178  // Try to re-map the arguments to constants.
1179  SmallVector<Constant *, 4> ConstantArgs;
1180  ConstantArgs.reserve(CS.arg_size());
1181  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1182  ++I) {
1183  Constant *C = dyn_cast<Constant>(*I);
1184  if (!C)
1185  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1186  if (!C)
1187  return false; // This argument doesn't map to a constant.
1188 
1189  ConstantArgs.push_back(C);
1190  }
1191  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
1192  SimplifiedValues[CS.getInstruction()] = C;
1193  return true;
1194  }
1195 
1196  return false;
1197 }
1198 
1199 bool CallAnalyzer::visitCallSite(CallSite CS) {
1200  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1201  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1202  // This aborts the entire analysis.
1203  ExposesReturnsTwice = true;
1204  return false;
1205  }
1206  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1207  ContainsNoDuplicateCall = true;
1208 
1209  if (Function *F = CS.getCalledFunction()) {
1210  // When we have a concrete function, first try to simplify it directly.
1211  if (simplifyCallSite(F, CS))
1212  return true;
1213 
1214  // Next check if it is an intrinsic we know about.
1215  // FIXME: Lift this into part of the InstVisitor.
1216  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1217  switch (II->getIntrinsicID()) {
1218  default:
1219  if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1220  disableLoadElimination();
1221  return Base::visitCallSite(CS);
1222 
1223  case Intrinsic::load_relative:
1224  // This is normally lowered to 4 LLVM instructions.
1225  Cost += 3 * InlineConstants::InstrCost;
1226  return false;
1227 
1228  case Intrinsic::memset:
1229  case Intrinsic::memcpy:
1230  case Intrinsic::memmove:
1231  disableLoadElimination();
1232  // SROA can usually chew through these intrinsics, but they aren't free.
1233  return false;
1234  case Intrinsic::localescape:
1235  HasFrameEscape = true;
1236  return false;
1237  case Intrinsic::vastart:
1238  case Intrinsic::vaend:
1239  UsesVarArgs = true;
1240  return false;
1241  }
1242  }
1243 
1244  if (F == CS.getInstruction()->getFunction()) {
1245  // This flag will fully abort the analysis, so don't bother with anything
1246  // else.
1247  IsRecursiveCall = true;
1248  return false;
1249  }
1250 
1251  if (TTI.isLoweredToCall(F)) {
1252  // We account for the average 1 instruction per call argument setup
1253  // here.
1254  Cost += CS.arg_size() * InlineConstants::InstrCost;
1255 
1256  // Everything other than inline ASM will also have a significant cost
1257  // merely from making the call.
1258  if (!isa<InlineAsm>(CS.getCalledValue()))
1260  }
1261 
1262  if (!CS.onlyReadsMemory())
1263  disableLoadElimination();
1264  return Base::visitCallSite(CS);
1265  }
1266 
1267  // Otherwise we're in a very special case -- an indirect function call. See
1268  // if we can be particularly clever about this.
1269  Value *Callee = CS.getCalledValue();
1270 
1271  // First, pay the price of the argument setup. We account for the average
1272  // 1 instruction per call argument setup here.
1273  Cost += CS.arg_size() * InlineConstants::InstrCost;
1274 
1275  // Next, check if this happens to be an indirect function call to a known
1276  // function in this inline context. If not, we've done all we can.
1277  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1278  if (!F) {
1279  if (!CS.onlyReadsMemory())
1280  disableLoadElimination();
1281  return Base::visitCallSite(CS);
1282  }
1283 
1284  // If we have a constant that we are calling as a function, we can peer
1285  // through it and see the function target. This happens not infrequently
1286  // during devirtualization and so we want to give it a hefty bonus for
1287  // inlining, but cap that bonus in the event that inlining wouldn't pan
1288  // out. Pretend to inline the function, with a custom threshold.
1289  auto IndirectCallParams = Params;
1291  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1292  IndirectCallParams);
1293  if (CA.analyzeCall(CS)) {
1294  // We were able to inline the indirect call! Subtract the cost from the
1295  // threshold to get the bonus we want to apply, but don't go below zero.
1296  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1297  }
1298 
1299  if (!F->onlyReadsMemory())
1300  disableLoadElimination();
1301  return Base::visitCallSite(CS);
1302 }
1303 
1304 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1305  // At least one return instruction will be free after inlining.
1306  bool Free = !HasReturn;
1307  HasReturn = true;
1308  return Free;
1309 }
1310 
1311 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1312  // We model unconditional branches as essentially free -- they really
1313  // shouldn't exist at all, but handling them makes the behavior of the
1314  // inliner more regular and predictable. Interestingly, conditional branches
1315  // which will fold away are also free.
1316  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1317  dyn_cast_or_null<ConstantInt>(
1318  SimplifiedValues.lookup(BI.getCondition()));
1319 }
1320 
1321 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1322  bool CheckSROA = SI.getType()->isPointerTy();
1323  Value *TrueVal = SI.getTrueValue();
1324  Value *FalseVal = SI.getFalseValue();
1325 
1326  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1327  if (!TrueC)
1328  TrueC = SimplifiedValues.lookup(TrueVal);
1329  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1330  if (!FalseC)
1331  FalseC = SimplifiedValues.lookup(FalseVal);
1332  Constant *CondC =
1333  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1334 
1335  if (!CondC) {
1336  // Select C, X, X => X
1337  if (TrueC == FalseC && TrueC) {
1338  SimplifiedValues[&SI] = TrueC;
1339  return true;
1340  }
1341 
1342  if (!CheckSROA)
1343  return Base::visitSelectInst(SI);
1344 
1345  std::pair<Value *, APInt> TrueBaseAndOffset =
1346  ConstantOffsetPtrs.lookup(TrueVal);
1347  std::pair<Value *, APInt> FalseBaseAndOffset =
1348  ConstantOffsetPtrs.lookup(FalseVal);
1349  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1350  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1351 
1352  Value *SROAArg;
1354  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1355  SROAArgValues[&SI] = SROAArg;
1356  return true;
1357  }
1358 
1359  return Base::visitSelectInst(SI);
1360  }
1361 
1362  // Select condition is a constant.
1363  Value *SelectedV = CondC->isAllOnesValue()
1364  ? TrueVal
1365  : (CondC->isNullValue()) ? FalseVal : nullptr;
1366  if (!SelectedV) {
1367  // Condition is a vector constant that is not all 1s or all 0s. If all
1368  // operands are constants, ConstantExpr::getSelect() can handle the cases
1369  // such as select vectors.
1370  if (TrueC && FalseC) {
1371  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1372  SimplifiedValues[&SI] = C;
1373  return true;
1374  }
1375  }
1376  return Base::visitSelectInst(SI);
1377  }
1378 
1379  // Condition is either all 1s or all 0s. SI can be simplified.
1380  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1381  SimplifiedValues[&SI] = SelectedC;
1382  return true;
1383  }
1384 
1385  if (!CheckSROA)
1386  return true;
1387 
1388  std::pair<Value *, APInt> BaseAndOffset =
1389  ConstantOffsetPtrs.lookup(SelectedV);
1390  if (BaseAndOffset.first) {
1391  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1392 
1393  Value *SROAArg;
1395  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1396  SROAArgValues[&SI] = SROAArg;
1397  }
1398 
1399  return true;
1400 }
1401 
1402 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1403  // We model unconditional switches as free, see the comments on handling
1404  // branches.
1405  if (isa<ConstantInt>(SI.getCondition()))
1406  return true;
1407  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1408  if (isa<ConstantInt>(V))
1409  return true;
1410 
1411  // Assume the most general case where the switch is lowered into
1412  // either a jump table, bit test, or a balanced binary tree consisting of
1413  // case clusters without merging adjacent clusters with the same
1414  // destination. We do not consider the switches that are lowered with a mix
1415  // of jump table/bit test/binary search tree. The cost of the switch is
1416  // proportional to the size of the tree or the size of jump table range.
1417  //
1418  // NB: We convert large switches which are just used to initialize large phi
1419  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1420  // inlining those. It will prevent inlining in cases where the optimization
1421  // does not (yet) fire.
1422 
1423  // Maximum valid cost increased in this function.
1424  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1425 
1426  // Exit early for a large switch, assuming one case needs at least one
1427  // instruction.
1428  // FIXME: This is not true for a bit test, but ignore such case for now to
1429  // save compile-time.
1430  int64_t CostLowerBound =
1431  std::min((int64_t)CostUpperBound,
1432  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1433 
1434  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1435  Cost = CostLowerBound;
1436  return false;
1437  }
1438 
1439  unsigned JumpTableSize = 0;
1440  unsigned NumCaseCluster =
1441  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1442 
1443  // If suitable for a jump table, consider the cost for the table size and
1444  // branch to destination.
1445  if (JumpTableSize) {
1446  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1447  4 * InlineConstants::InstrCost;
1448 
1449  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1450  return false;
1451  }
1452 
1453  // Considering forming a binary search, we should find the number of nodes
1454  // which is same as the number of comparisons when lowered. For a given
1455  // number of clusters, n, we can define a recursive function, f(n), to find
1456  // the number of nodes in the tree. The recursion is :
1457  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1458  // and f(n) = n, when n <= 3.
1459  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1460  // when n > 3. So, the number of comparisons from leaves should be n, while
1461  // the number of non-leaf should be :
1462  // 2^(log2(n) - 1) - 1
1463  // = 2^log2(n) * 2^-1 - 1
1464  // = n / 2 - 1.
1465  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1466  // number of comparisons in a simple closed form :
1467  // n + n / 2 - 1 = n * 3 / 2 - 1
1468  if (NumCaseCluster <= 3) {
1469  // Suppose a comparison includes one compare and one conditional branch.
1470  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1471  return false;
1472  }
1473 
1474  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1475  int64_t SwitchCost =
1476  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1477 
1478  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1479  return false;
1480 }
1481 
1482 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1483  // We never want to inline functions that contain an indirectbr. This is
1484  // incorrect because all the blockaddress's (in static global initializers
1485  // for example) would be referring to the original function, and this
1486  // indirect jump would jump from the inlined copy of the function into the
1487  // original function which is extremely undefined behavior.
1488  // FIXME: This logic isn't really right; we can safely inline functions with
1489  // indirectbr's as long as no other function or global references the
1490  // blockaddress of a block within the current function.
1491  HasIndirectBr = true;
1492  return false;
1493 }
1494 
1495 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1496  // FIXME: It's not clear that a single instruction is an accurate model for
1497  // the inline cost of a resume instruction.
1498  return false;
1499 }
1500 
1501 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1502  // FIXME: It's not clear that a single instruction is an accurate model for
1503  // the inline cost of a cleanupret instruction.
1504  return false;
1505 }
1506 
1507 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1508  // FIXME: It's not clear that a single instruction is an accurate model for
1509  // the inline cost of a catchret instruction.
1510  return false;
1511 }
1512 
1513 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1514  // FIXME: It might be reasonably to discount the cost of instructions leading
1515  // to unreachable as they have the lowest possible impact on both runtime and
1516  // code size.
1517  return true; // No actual code is needed for unreachable.
1518 }
1519 
1520 bool CallAnalyzer::visitInstruction(Instruction &I) {
1521  // Some instructions are free. All of the free intrinsics can also be
1522  // handled by SROA, etc.
1523  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1524  return true;
1525 
1526  // We found something we don't understand or can't handle. Mark any SROA-able
1527  // values in the operand list as no longer viable.
1528  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1529  disableSROA(*OI);
1530 
1531  return false;
1532 }
1533 
1534 /// \brief Analyze a basic block for its contribution to the inline cost.
1535 ///
1536 /// This method walks the analyzer over every instruction in the given basic
1537 /// block and accounts for their cost during inlining at this callsite. It
1538 /// aborts early if the threshold has been exceeded or an impossible to inline
1539 /// construct has been detected. It returns false if inlining is no longer
1540 /// viable, and true if inlining remains viable.
1541 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1542  SmallPtrSetImpl<const Value *> &EphValues) {
1543  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1544  // FIXME: Currently, the number of instructions in a function regardless of
1545  // our ability to simplify them during inline to constants or dead code,
1546  // are actually used by the vector bonus heuristic. As long as that's true,
1547  // we have to special case debug intrinsics here to prevent differences in
1548  // inlining due to debug symbols. Eventually, the number of unsimplified
1549  // instructions shouldn't factor into the cost computation, but until then,
1550  // hack around it here.
1551  if (isa<DbgInfoIntrinsic>(I))
1552  continue;
1553 
1554  // Skip ephemeral values.
1555  if (EphValues.count(&*I))
1556  continue;
1557 
1558  ++NumInstructions;
1559  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1560  ++NumVectorInstructions;
1561 
1562  // If the instruction simplified to a constant, there is no cost to this
1563  // instruction. Visit the instructions using our InstVisitor to account for
1564  // all of the per-instruction logic. The visit tree returns true if we
1565  // consumed the instruction in any way, and false if the instruction's base
1566  // cost should count against inlining.
1567  if (Base::visit(&*I))
1568  ++NumInstructionsSimplified;
1569  else
1571 
1572  using namespace ore;
1573  // If the visit this instruction detected an uninlinable pattern, abort.
1574  if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1575  HasIndirectBr || HasFrameEscape || UsesVarArgs) {
1576  if (ORE)
1577  ORE->emit([&]() {
1578  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1579  CandidateCS.getInstruction())
1580  << NV("Callee", &F)
1581  << " has uninlinable pattern and cost is not fully computed";
1582  });
1583  return false;
1584  }
1585 
1586  // If the caller is a recursive function then we don't want to inline
1587  // functions which allocate a lot of stack space because it would increase
1588  // the caller stack usage dramatically.
1589  if (IsCallerRecursive &&
1591  if (ORE)
1592  ORE->emit([&]() {
1593  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1594  CandidateCS.getInstruction())
1595  << NV("Callee", &F)
1596  << " is recursive and allocates too much stack space. Cost is "
1597  "not fully computed";
1598  });
1599  return false;
1600  }
1601 
1602  // Check if we've past the maximum possible threshold so we don't spin in
1603  // huge basic blocks that will never inline.
1604  if (Cost >= Threshold && !ComputeFullInlineCost)
1605  return false;
1606  }
1607 
1608  return true;
1609 }
1610 
1611 /// \brief Compute the base pointer and cumulative constant offsets for V.
1612 ///
1613 /// This strips all constant offsets off of V, leaving it the base pointer, and
1614 /// accumulates the total constant offset applied in the returned constant. It
1615 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1616 /// no constant offsets applied.
1617 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1618  if (!V->getType()->isPointerTy())
1619  return nullptr;
1620 
1621  unsigned AS = V->getType()->getPointerAddressSpace();
1622  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1623  APInt Offset = APInt::getNullValue(IntPtrWidth);
1624 
1625  // Even though we don't look through PHI nodes, we could be called on an
1626  // instruction in an unreachable block, which may be on a cycle.
1627  SmallPtrSet<Value *, 4> Visited;
1628  Visited.insert(V);
1629  do {
1630  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1631  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1632  return nullptr;
1633  V = GEP->getPointerOperand();
1634  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1635  V = cast<Operator>(V)->getOperand(0);
1636  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1637  if (GA->isInterposable())
1638  break;
1639  V = GA->getAliasee();
1640  } else {
1641  break;
1642  }
1643  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1644  } while (Visited.insert(V).second);
1645 
1646  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1647  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1648 }
1649 
1650 /// \brief Find dead blocks due to deleted CFG edges during inlining.
1651 ///
1652 /// If we know the successor of the current block, \p CurrBB, has to be \p
1653 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1654 /// no live incoming CFG edges. If one block is found to be dead, we can
1655 /// continue growing the dead block list by checking the successors of the dead
1656 /// blocks to see if all their incoming edges are dead or not.
1657 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1658  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1659  // A CFG edge is dead if the predecessor is dead or the predessor has a
1660  // known successor which is not the one under exam.
1661  return (DeadBlocks.count(Pred) ||
1662  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1663  };
1664 
1665  auto IsNewlyDead = [&](BasicBlock *BB) {
1666  // If all the edges to a block are dead, the block is also dead.
1667  return (!DeadBlocks.count(BB) &&
1669  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1670  };
1671 
1672  for (BasicBlock *Succ : successors(CurrBB)) {
1673  if (Succ == NextBB || !IsNewlyDead(Succ))
1674  continue;
1676  NewDead.push_back(Succ);
1677  while (!NewDead.empty()) {
1678  BasicBlock *Dead = NewDead.pop_back_val();
1679  if (DeadBlocks.insert(Dead))
1680  // Continue growing the dead block lists.
1681  for (BasicBlock *S : successors(Dead))
1682  if (IsNewlyDead(S))
1683  NewDead.push_back(S);
1684  }
1685  }
1686 }
1687 
1688 /// \brief Analyze a call site for potential inlining.
1689 ///
1690 /// Returns true if inlining this call is viable, and false if it is not
1691 /// viable. It computes the cost and adjusts the threshold based on numerous
1692 /// factors and heuristics. If this method returns false but the computed cost
1693 /// is below the computed threshold, then inlining was forcibly disabled by
1694 /// some artifact of the routine.
1695 bool CallAnalyzer::analyzeCall(CallSite CS) {
1696  ++NumCallsAnalyzed;
1697 
1698  // Perform some tweaks to the cost and threshold based on the direct
1699  // callsite information.
1700 
1701  // We want to more aggressively inline vector-dense kernels, so up the
1702  // threshold, and we'll lower it if the % of vector instructions gets too
1703  // low. Note that these bonuses are some what arbitrary and evolved over time
1704  // by accident as much as because they are principled bonuses.
1705  //
1706  // FIXME: It would be nice to remove all such bonuses. At least it would be
1707  // nice to base the bonus values on something more scientific.
1708  assert(NumInstructions == 0);
1709  assert(NumVectorInstructions == 0);
1710 
1711  // Update the threshold based on callsite properties
1712  updateThreshold(CS, F);
1713 
1714  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1715  // this Threshold any time, and cost cannot decrease, we can stop processing
1716  // the rest of the function body.
1717  Threshold += (SingleBBBonus + VectorBonus);
1718 
1719  // Give out bonuses for the callsite, as the instructions setting them up
1720  // will be gone after inlining.
1721  Cost -= getCallsiteCost(CS, DL);
1722 
1723  // If this function uses the coldcc calling convention, prefer not to inline
1724  // it.
1725  if (F.getCallingConv() == CallingConv::Cold)
1727 
1728  // Check if we're done. This can happen due to bonuses and penalties.
1729  if (Cost >= Threshold && !ComputeFullInlineCost)
1730  return false;
1731 
1732  if (F.empty())
1733  return true;
1734 
1735  Function *Caller = CS.getInstruction()->getFunction();
1736  // Check if the caller function is recursive itself.
1737  for (User *U : Caller->users()) {
1738  CallSite Site(U);
1739  if (!Site)
1740  continue;
1741  Instruction *I = Site.getInstruction();
1742  if (I->getFunction() == Caller) {
1743  IsCallerRecursive = true;
1744  break;
1745  }
1746  }
1747 
1748  // Populate our simplified values by mapping from function arguments to call
1749  // arguments with known important simplifications.
1750  CallSite::arg_iterator CAI = CS.arg_begin();
1751  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1752  FAI != FAE; ++FAI, ++CAI) {
1753  assert(CAI != CS.arg_end());
1754  if (Constant *C = dyn_cast<Constant>(CAI))
1755  SimplifiedValues[&*FAI] = C;
1756 
1757  Value *PtrArg = *CAI;
1758  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1759  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1760 
1761  // We can SROA any pointer arguments derived from alloca instructions.
1762  if (isa<AllocaInst>(PtrArg)) {
1763  SROAArgValues[&*FAI] = PtrArg;
1764  SROAArgCosts[PtrArg] = 0;
1765  }
1766  }
1767  }
1768  NumConstantArgs = SimplifiedValues.size();
1769  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1770  NumAllocaArgs = SROAArgValues.size();
1771 
1772  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1773  // the ephemeral values multiple times (and they're completely determined by
1774  // the callee, so this is purely duplicate work).
1776  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1777 
1778  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1779  // adding basic blocks of the callee which can be proven to be dead for this
1780  // particular call site in order to get more accurate cost estimates. This
1781  // requires a somewhat heavyweight iteration pattern: we need to walk the
1782  // basic blocks in a breadth-first order as we insert live successors. To
1783  // accomplish this, prioritizing for small iterations because we exit after
1784  // crossing our threshold, we use a small-size optimized SetVector.
1787  BBSetVector;
1788  BBSetVector BBWorklist;
1789  BBWorklist.insert(&F.getEntryBlock());
1790  bool SingleBB = true;
1791  // Note that we *must not* cache the size, this loop grows the worklist.
1792  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1793  // Bail out the moment we cross the threshold. This means we'll under-count
1794  // the cost, but only when undercounting doesn't matter.
1795  if (Cost >= Threshold && !ComputeFullInlineCost)
1796  break;
1797 
1798  BasicBlock *BB = BBWorklist[Idx];
1799  if (BB->empty())
1800  continue;
1801 
1802  // Disallow inlining a blockaddress. A blockaddress only has defined
1803  // behavior for an indirect branch in the same function, and we do not
1804  // currently support inlining indirect branches. But, the inliner may not
1805  // see an indirect branch that ends up being dead code at a particular call
1806  // site. If the blockaddress escapes the function, e.g., via a global
1807  // variable, inlining may lead to an invalid cross-function reference.
1808  if (BB->hasAddressTaken())
1809  return false;
1810 
1811  // Analyze the cost of this block. If we blow through the threshold, this
1812  // returns false, and we can bail on out.
1813  if (!analyzeBlock(BB, EphValues))
1814  return false;
1815 
1816  TerminatorInst *TI = BB->getTerminator();
1817 
1818  // Add in the live successors by first checking whether we have terminator
1819  // that may be simplified based on the values simplified by this call.
1820  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1821  if (BI->isConditional()) {
1822  Value *Cond = BI->getCondition();
1823  if (ConstantInt *SimpleCond =
1824  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1825  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1826  BBWorklist.insert(NextBB);
1827  KnownSuccessors[BB] = NextBB;
1828  findDeadBlocks(BB, NextBB);
1829  continue;
1830  }
1831  }
1832  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1833  Value *Cond = SI->getCondition();
1834  if (ConstantInt *SimpleCond =
1835  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1836  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1837  BBWorklist.insert(NextBB);
1838  KnownSuccessors[BB] = NextBB;
1839  findDeadBlocks(BB, NextBB);
1840  continue;
1841  }
1842  }
1843 
1844  // If we're unable to select a particular successor, just count all of
1845  // them.
1846  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1847  ++TIdx)
1848  BBWorklist.insert(TI->getSuccessor(TIdx));
1849 
1850  // If we had any successors at this point, than post-inlining is likely to
1851  // have them as well. Note that we assume any basic blocks which existed
1852  // due to branches or switches which folded above will also fold after
1853  // inlining.
1854  if (SingleBB && TI->getNumSuccessors() > 1) {
1855  // Take off the bonus we applied to the threshold.
1856  Threshold -= SingleBBBonus;
1857  SingleBB = false;
1858  }
1859  }
1860 
1861  bool OnlyOneCallAndLocalLinkage =
1862  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1863  // If this is a noduplicate call, we can still inline as long as
1864  // inlining this would cause the removal of the caller (so the instruction
1865  // is not actually duplicated, just moved).
1866  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1867  return false;
1868 
1869  // We applied the maximum possible vector bonus at the beginning. Now,
1870  // subtract the excess bonus, if any, from the Threshold before
1871  // comparing against Cost.
1872  if (NumVectorInstructions <= NumInstructions / 10)
1873  Threshold -= VectorBonus;
1874  else if (NumVectorInstructions <= NumInstructions / 2)
1875  Threshold -= VectorBonus/2;
1876 
1877  return Cost < std::max(1, Threshold);
1878 }
1879 
1880 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1881 /// \brief Dump stats about this call's analysis.
1883 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1884  DEBUG_PRINT_STAT(NumConstantArgs);
1885  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1886  DEBUG_PRINT_STAT(NumAllocaArgs);
1887  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1888  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1889  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1890  DEBUG_PRINT_STAT(NumInstructions);
1891  DEBUG_PRINT_STAT(SROACostSavings);
1892  DEBUG_PRINT_STAT(SROACostSavingsLost);
1893  DEBUG_PRINT_STAT(LoadEliminationCost);
1894  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1895  DEBUG_PRINT_STAT(Cost);
1896  DEBUG_PRINT_STAT(Threshold);
1897 #undef DEBUG_PRINT_STAT
1898 }
1899 #endif
1900 
1901 /// \brief Test that there are no attribute conflicts between Caller and Callee
1902 /// that prevent inlining.
1904  Function *Callee,
1905  TargetTransformInfo &TTI) {
1906  return TTI.areInlineCompatible(Caller, Callee) &&
1907  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1908 }
1909 
1911  int Cost = 0;
1912  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1913  if (CS.isByValArgument(I)) {
1914  // We approximate the number of loads and stores needed by dividing the
1915  // size of the byval type by the target's pointer size.
1916  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1917  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1918  unsigned AS = PTy->getAddressSpace();
1919  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1920  // Ceiling division.
1921  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1922 
1923  // If it generates more than 8 stores it is likely to be expanded as an
1924  // inline memcpy so we take that as an upper bound. Otherwise we assume
1925  // one load and one store per word copied.
1926  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1927  // here instead of a magic number of 8, but it's not available via
1928  // DataLayout.
1929  NumStores = std::min(NumStores, 8U);
1930 
1931  Cost += 2 * NumStores * InlineConstants::InstrCost;
1932  } else {
1933  // For non-byval arguments subtract off one instruction per call
1934  // argument.
1936  }
1937  }
1938  // The call instruction also disappears after inlining.
1939  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1940  return Cost;
1941 }
1942 
1944  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1945  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1948  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1949  GetAssumptionCache, GetBFI, PSI, ORE);
1950 }
1951 
1953  CallSite CS, Function *Callee, const InlineParams &Params,
1954  TargetTransformInfo &CalleeTTI,
1955  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1958 
1959  // Cannot inline indirect calls.
1960  if (!Callee)
1961  return llvm::InlineCost::getNever();
1962 
1963  // Never inline calls with byval arguments that does not have the alloca
1964  // address space. Since byval arguments can be replaced with a copy to an
1965  // alloca, the inlined code would need to be adjusted to handle that the
1966  // argument is in the alloca address space (so it is a little bit complicated
1967  // to solve).
1968  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
1969  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
1970  if (CS.isByValArgument(I)) {
1971  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1972  if (PTy->getAddressSpace() != AllocaAS)
1973  return llvm::InlineCost::getNever();
1974  }
1975 
1976  // Calls to functions with always-inline attributes should be inlined
1977  // whenever possible.
1978  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1979  if (isInlineViable(*Callee))
1980  return llvm::InlineCost::getAlways();
1981  return llvm::InlineCost::getNever();
1982  }
1983 
1984  // Never inline functions with conflicting attributes (unless callee has
1985  // always-inline attribute).
1986  Function *Caller = CS.getCaller();
1987  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
1988  return llvm::InlineCost::getNever();
1989 
1990  // Don't inline this call if the caller has the optnone attribute.
1991  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
1992  return llvm::InlineCost::getNever();
1993 
1994  // Don't inline functions which can be interposed at link-time. Don't inline
1995  // functions marked noinline or call sites marked noinline.
1996  // Note: inlining non-exact non-interposable functions is fine, since we know
1997  // we have *a* correct implementation of the source level function.
1998  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1999  CS.isNoInline())
2000  return llvm::InlineCost::getNever();
2001 
2002  DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2003  << "... (caller:" << Caller->getName() << ")\n");
2004 
2005  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
2006  Params);
2007  bool ShouldInline = CA.analyzeCall(CS);
2008 
2009  DEBUG(CA.dump());
2010 
2011  // Check if there was a reason to force inlining or no inlining.
2012  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2013  return InlineCost::getNever();
2014  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2015  return InlineCost::getAlways();
2016 
2017  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2018 }
2019 
2021  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2022  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2023  // Disallow inlining of functions which contain indirect branches or
2024  // blockaddresses.
2025  if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
2026  return false;
2027 
2028  for (auto &II : *BI) {
2029  CallSite CS(&II);
2030  if (!CS)
2031  continue;
2032 
2033  // Disallow recursive calls.
2034  if (&F == CS.getCalledFunction())
2035  return false;
2036 
2037  // Disallow calls which expose returns-twice to a function not previously
2038  // attributed as such.
2039  if (!ReturnsTwice && CS.isCall() &&
2040  cast<CallInst>(CS.getInstruction())->canReturnTwice())
2041  return false;
2042 
2043  if (CS.getCalledFunction())
2044  switch (CS.getCalledFunction()->getIntrinsicID()) {
2045  default:
2046  break;
2047  // Disallow inlining functions that call @llvm.localescape. Doing this
2048  // correctly would require major changes to the inliner.
2049  case llvm::Intrinsic::localescape:
2050  // Disallow inlining of functions that access VarArgs.
2051  case llvm::Intrinsic::vastart:
2052  case llvm::Intrinsic::vaend:
2053  return false;
2054  }
2055  }
2056  }
2057 
2058  return true;
2059 }
2060 
2061 // APIs to create InlineParams based on command line flags and/or other
2062 // parameters.
2063 
2065  InlineParams Params;
2066 
2067  // This field is the threshold to use for a callee by default. This is
2068  // derived from one or more of:
2069  // * optimization or size-optimization levels,
2070  // * a value passed to createFunctionInliningPass function, or
2071  // * the -inline-threshold flag.
2072  // If the -inline-threshold flag is explicitly specified, that is used
2073  // irrespective of anything else.
2074  if (InlineThreshold.getNumOccurrences() > 0)
2076  else
2077  Params.DefaultThreshold = Threshold;
2078 
2079  // Set the HintThreshold knob from the -inlinehint-threshold.
2080  Params.HintThreshold = HintThreshold;
2081 
2082  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2083  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2084 
2085  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2086  // populate LocallyHotCallSiteThreshold. Later, we populate
2087  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2088  // we know that optimization level is O3 (in the getInlineParams variant that
2089  // takes the opt and size levels).
2090  // FIXME: Remove this check (and make the assignment unconditional) after
2091  // addressing size regression issues at O2.
2092  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2094 
2095  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2097 
2098  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2099  // -inlinehint-threshold commandline option is not explicitly given. If that
2100  // option is present, then its value applies even for callees with size and
2101  // minsize attributes.
2102  // If the -inline-threshold is not specified, set the ColdThreshold from the
2103  // -inlinecold-threshold even if it is not explicitly passed. If
2104  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2105  // explicitly specified to set the ColdThreshold knob
2106  if (InlineThreshold.getNumOccurrences() == 0) {
2109  Params.ColdThreshold = ColdThreshold;
2110  } else if (ColdThreshold.getNumOccurrences() > 0) {
2111  Params.ColdThreshold = ColdThreshold;
2112  }
2113  return Params;
2114 }
2115 
2118 }
2119 
2120 // Compute the default threshold for inlining based on the opt level and the
2121 // size opt level.
2122 static int computeThresholdFromOptLevels(unsigned OptLevel,
2123  unsigned SizeOptLevel) {
2124  if (OptLevel > 2)
2126  if (SizeOptLevel == 1) // -Os
2128  if (SizeOptLevel == 2) // -Oz
2130  return InlineThreshold;
2131 }
2132 
2133 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2134  auto Params =
2135  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2136  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2137  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2138  // when it is specified explicitly.
2139  if (OptLevel > 2)
2141  return Params;
2142 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
bool onlyReadsMemory() const
Determine if the function does not access or only reads memory.
Definition: Function.h:449
uint64_t CallInst * C
Return a value (possibly void), from a function.
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:561
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:130
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
bool isSimple() const
Definition: Instructions.h:262
bool empty() const
Definition: Function.h:640
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:100
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:359
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Base class for instruction visitors.
Definition: InstVisitor.h:81
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:555
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:141
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
iterator end()
Definition: Function.h:636
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
const int ColdccPenalty
Definition: InlineCost.h:48
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1714
A cache of .assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:419
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:302
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:814
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:658
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:354
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1881
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2175
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:378
op_iterator op_begin()
Definition: User.h:214
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:768
Represents the cost of inlining a function.
Definition: InlineCost.h:65
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:438
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Definition: CallSite.h:454
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:258
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:515
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2124
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:640
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:135
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
IterTy arg_end() const
Definition: CallSite.h:575
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
InstrTy * getInstruction() const
Definition: CallSite.h:92
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
bool empty() const
Definition: BasicBlock.h:263
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:954
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:845
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:892
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1903
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:962
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
const int LastCallToStaticBonus
Definition: InlineCost.h:47
An instruction for storing to memory.
Definition: Instructions.h:306
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:634
const int CallPenalty
Definition: InlineCost.h:46
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
Class to represent pointers.
Definition: DerivedTypes.h:467
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1727
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:618
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:744
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2097
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:45
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Resume the propagation of an exception.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:362
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:264
op_iterator op_end()
Definition: User.h:216
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:144
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:576
static InlineCost getNever()
Definition: InlineCost.h:89
op_range operands()
Definition: User.h:222
Value * getPointerOperand()
Definition: Instructions.h:270
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
arg_iterator arg_begin()
Definition: Function.h:649
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:1979
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:656
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
const AMDGPUAS & AS
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:151
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:831
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:186
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
IterTy arg_begin() const
Definition: CallSite.h:571
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
#define DEBUG_TYPE
Definition: InlineCost.cpp:42
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:598
bool isConditional() const
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:138
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:554
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:175
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
iterator_range< user_iterator > users()
Definition: Value.h:405
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1484
const Value * getFalseValue() const
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:554
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:422
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Basic Alias true
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:959
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:537
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1701
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:573
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:598
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:52
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:154
bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
static const Function * getParent(const Value *V)
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:418
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2121
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
bool isSimple() const
Definition: Instructions.h:387
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
Constant * ConstantFoldCall(ImmutableCallSite CS, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static InlineCost getAlways()
Definition: InlineCost.h:86
Value * getPointerOperand()
Definition: Instructions.h:398
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Type * getElementType() const
Definition: DerivedTypes.h:486
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:147
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:132
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:81
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)