LLVM  9.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/InstVisitor.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/Support/Debug.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "inline-cost"
45 
46 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
47 
49  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
50  cl::desc("Control the amount of inlining to perform (default = 225)"));
51 
53  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
54  cl::desc("Threshold for inlining functions with inline hint"));
55 
56 static cl::opt<int>
57  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
59  cl::desc("Threshold for inlining cold callsites"));
60 
61 // We introduce this threshold to help performance of instrumentation based
62 // PGO before we actually hook up inliner with analysis passes such as BPI and
63 // BFI.
65  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
66  cl::desc("Threshold for inlining functions with cold attribute"));
67 
68 static cl::opt<int>
69  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
71  cl::desc("Threshold for hot callsites "));
72 
74  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
75  cl::desc("Threshold for locally hot callsites "));
76 
78  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
79  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
80  "entry frequency, for a callsite to be cold in the absence of "
81  "profile information."));
82 
84  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
85  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
86  "entry frequency, for a callsite to be hot in the absence of "
87  "profile information."));
88 
90  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
91  cl::desc("Compute the full inline cost of a call site even when the cost "
92  "exceeds the threshold."));
93 
94 namespace {
95 
96 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
98  friend class InstVisitor<CallAnalyzer, bool>;
99 
100  /// The TargetTransformInfo available for this compilation.
101  const TargetTransformInfo &TTI;
102 
103  /// Getter for the cache of @llvm.assume intrinsics.
104  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
105 
106  /// Getter for BlockFrequencyInfo
108 
109  /// Profile summary information.
110  ProfileSummaryInfo *PSI;
111 
112  /// The called function.
113  Function &F;
114 
115  // Cache the DataLayout since we use it a lot.
116  const DataLayout &DL;
117 
118  /// The OptimizationRemarkEmitter available for this compilation.
120 
121  /// The candidate callsite being analyzed. Please do not use this to do
122  /// analysis in the caller function; we want the inline cost query to be
123  /// easily cacheable. Instead, use the cover function paramHasAttr.
124  CallSite CandidateCS;
125 
126  /// Tunable parameters that control the analysis.
127  const InlineParams &Params;
128 
129  int Threshold;
130  int Cost;
131  bool ComputeFullInlineCost;
132 
133  bool IsCallerRecursive;
134  bool IsRecursiveCall;
135  bool ExposesReturnsTwice;
136  bool HasDynamicAlloca;
137  bool ContainsNoDuplicateCall;
138  bool HasReturn;
139  bool HasIndirectBr;
140  bool HasUninlineableIntrinsic;
141  bool InitsVargArgs;
142 
143  /// Number of bytes allocated statically by the callee.
144  uint64_t AllocatedSize;
145  unsigned NumInstructions, NumVectorInstructions;
146  int VectorBonus, TenPercentVectorBonus;
147  // Bonus to be applied when the callee has only one reachable basic block.
148  int SingleBBBonus;
149 
150  /// While we walk the potentially-inlined instructions, we build up and
151  /// maintain a mapping of simplified values specific to this callsite. The
152  /// idea is to propagate any special information we have about arguments to
153  /// this call through the inlinable section of the function, and account for
154  /// likely simplifications post-inlining. The most important aspect we track
155  /// is CFG altering simplifications -- when we prove a basic block dead, that
156  /// can cause dramatic shifts in the cost of inlining a function.
157  DenseMap<Value *, Constant *> SimplifiedValues;
158 
159  /// Keep track of the values which map back (through function arguments) to
160  /// allocas on the caller stack which could be simplified through SROA.
161  DenseMap<Value *, Value *> SROAArgValues;
162 
163  /// The mapping of caller Alloca values to their accumulated cost savings. If
164  /// we have to disable SROA for one of the allocas, this tells us how much
165  /// cost must be added.
166  DenseMap<Value *, int> SROAArgCosts;
167 
168  /// Keep track of values which map to a pointer base and constant offset.
170 
171  /// Keep track of dead blocks due to the constant arguments.
172  SetVector<BasicBlock *> DeadBlocks;
173 
174  /// The mapping of the blocks to their known unique successors due to the
175  /// constant arguments.
176  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
177 
178  /// Model the elimination of repeated loads that is expected to happen
179  /// whenever we simplify away the stores that would otherwise cause them to be
180  /// loads.
181  bool EnableLoadElimination;
182  SmallPtrSet<Value *, 16> LoadAddrSet;
183  int LoadEliminationCost;
184 
185  // Custom simplification helper routines.
186  bool isAllocaDerivedArg(Value *V);
187  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
189  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
190  void disableSROA(Value *V);
191  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
192  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
193  int InstructionCost);
194  void disableLoadElimination();
195  bool isGEPFree(GetElementPtrInst &GEP);
196  bool canFoldInboundsGEP(GetElementPtrInst &I);
197  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
198  bool simplifyCallSite(Function *F, CallSite CS);
199  template <typename Callable>
200  bool simplifyInstruction(Instruction &I, Callable Evaluate);
201  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
202 
203  /// Return true if the given argument to the function being considered for
204  /// inlining has the given attribute set either at the call site or the
205  /// function declaration. Primarily used to inspect call site specific
206  /// attributes since these can be more precise than the ones on the callee
207  /// itself.
208  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
209 
210  /// Return true if the given value is known non null within the callee if
211  /// inlined through this particular callsite.
212  bool isKnownNonNullInCallee(Value *V);
213 
214  /// Update Threshold based on callsite properties such as callee
215  /// attributes and callee hotness for PGO builds. The Callee is explicitly
216  /// passed to support analyzing indirect calls whose target is inferred by
217  /// analysis.
218  void updateThreshold(CallSite CS, Function &Callee);
219 
220  /// Return true if size growth is allowed when inlining the callee at CS.
221  bool allowSizeGrowth(CallSite CS);
222 
223  /// Return true if \p CS is a cold callsite.
224  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
225 
226  /// Return a higher threshold if \p CS is a hot callsite.
227  Optional<int> getHotCallSiteThreshold(CallSite CS,
228  BlockFrequencyInfo *CallerBFI);
229 
230  // Custom analysis routines.
231  InlineResult analyzeBlock(BasicBlock *BB,
232  SmallPtrSetImpl<const Value *> &EphValues);
233 
234  // Disable several entry points to the visitor so we don't accidentally use
235  // them by declaring but not defining them here.
236  void visit(Module *);
237  void visit(Module &);
238  void visit(Function *);
239  void visit(Function &);
240  void visit(BasicBlock *);
241  void visit(BasicBlock &);
242 
243  // Provide base case for our instruction visit.
244  bool visitInstruction(Instruction &I);
245 
246  // Our visit overrides.
247  bool visitAlloca(AllocaInst &I);
248  bool visitPHI(PHINode &I);
249  bool visitGetElementPtr(GetElementPtrInst &I);
250  bool visitBitCast(BitCastInst &I);
251  bool visitPtrToInt(PtrToIntInst &I);
252  bool visitIntToPtr(IntToPtrInst &I);
253  bool visitCastInst(CastInst &I);
254  bool visitUnaryInstruction(UnaryInstruction &I);
255  bool visitCmpInst(CmpInst &I);
256  bool visitSub(BinaryOperator &I);
257  bool visitBinaryOperator(BinaryOperator &I);
258  bool visitLoad(LoadInst &I);
259  bool visitStore(StoreInst &I);
260  bool visitExtractValue(ExtractValueInst &I);
261  bool visitInsertValue(InsertValueInst &I);
262  bool visitCallSite(CallSite CS);
263  bool visitReturnInst(ReturnInst &RI);
264  bool visitBranchInst(BranchInst &BI);
265  bool visitSelectInst(SelectInst &SI);
266  bool visitSwitchInst(SwitchInst &SI);
267  bool visitIndirectBrInst(IndirectBrInst &IBI);
268  bool visitResumeInst(ResumeInst &RI);
269  bool visitCleanupReturnInst(CleanupReturnInst &RI);
270  bool visitCatchReturnInst(CatchReturnInst &RI);
271  bool visitUnreachableInst(UnreachableInst &I);
272 
273 public:
274  CallAnalyzer(const TargetTransformInfo &TTI,
275  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
278  Function &Callee, CallSite CSArg, const InlineParams &Params)
279  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
280  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
281  CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
282  Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
283  Params.ComputeFullInlineCost || ORE),
284  IsCallerRecursive(false), IsRecursiveCall(false),
285  ExposesReturnsTwice(false), HasDynamicAlloca(false),
286  ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
287  HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
288  NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
289  SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
290  NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
291  NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
292  NumInstructionsSimplified(0), SROACostSavings(0),
293  SROACostSavingsLost(0) {}
294 
295  InlineResult analyzeCall(CallSite CS);
296 
297  int getThreshold() { return Threshold; }
298  int getCost() { return Cost; }
299 
300  // Keep a bunch of stats about the cost savings found so we can print them
301  // out when debugging.
302  unsigned NumConstantArgs;
303  unsigned NumConstantOffsetPtrArgs;
304  unsigned NumAllocaArgs;
305  unsigned NumConstantPtrCmps;
306  unsigned NumConstantPtrDiffs;
307  unsigned NumInstructionsSimplified;
308  unsigned SROACostSavings;
309  unsigned SROACostSavingsLost;
310 
311  void dump();
312 };
313 
314 } // namespace
315 
316 /// Test whether the given value is an Alloca-derived function argument.
317 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
318  return SROAArgValues.count(V);
319 }
320 
321 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
322 /// Returns false if V does not map to a SROA-candidate.
323 bool CallAnalyzer::lookupSROAArgAndCost(
325  if (SROAArgValues.empty() || SROAArgCosts.empty())
326  return false;
327 
328  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
329  if (ArgIt == SROAArgValues.end())
330  return false;
331 
332  Arg = ArgIt->second;
333  CostIt = SROAArgCosts.find(Arg);
334  return CostIt != SROAArgCosts.end();
335 }
336 
337 /// Disable SROA for the candidate marked by this cost iterator.
338 ///
339 /// This marks the candidate as no longer viable for SROA, and adds the cost
340 /// savings associated with it back into the inline cost measurement.
341 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
342  // If we're no longer able to perform SROA we need to undo its cost savings
343  // and prevent subsequent analysis.
344  Cost += CostIt->second;
345  SROACostSavings -= CostIt->second;
346  SROACostSavingsLost += CostIt->second;
347  SROAArgCosts.erase(CostIt);
348  disableLoadElimination();
349 }
350 
351 /// If 'V' maps to a SROA candidate, disable SROA for it.
352 void CallAnalyzer::disableSROA(Value *V) {
353  Value *SROAArg;
355  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
356  disableSROA(CostIt);
357 }
358 
359 /// Accumulate the given cost for a particular SROA candidate.
360 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
361  int InstructionCost) {
362  CostIt->second += InstructionCost;
363  SROACostSavings += InstructionCost;
364 }
365 
366 void CallAnalyzer::disableLoadElimination() {
367  if (EnableLoadElimination) {
368  Cost += LoadEliminationCost;
369  LoadEliminationCost = 0;
370  EnableLoadElimination = false;
371  }
372 }
373 
374 /// Accumulate a constant GEP offset into an APInt if possible.
375 ///
376 /// Returns false if unable to compute the offset for any reason. Respects any
377 /// simplified values known during the analysis of this callsite.
378 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
379  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
380  assert(IntPtrWidth == Offset.getBitWidth());
381 
382  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
383  GTI != GTE; ++GTI) {
384  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
385  if (!OpC)
386  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
387  OpC = dyn_cast<ConstantInt>(SimpleOp);
388  if (!OpC)
389  return false;
390  if (OpC->isZero())
391  continue;
392 
393  // Handle a struct index, which adds its field offset to the pointer.
394  if (StructType *STy = GTI.getStructTypeOrNull()) {
395  unsigned ElementIdx = OpC->getZExtValue();
396  const StructLayout *SL = DL.getStructLayout(STy);
397  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
398  continue;
399  }
400 
401  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
402  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
403  }
404  return true;
405 }
406 
407 /// Use TTI to check whether a GEP is free.
408 ///
409 /// Respects any simplified values known during the analysis of this callsite.
410 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
411  SmallVector<Value *, 4> Operands;
412  Operands.push_back(GEP.getOperand(0));
413  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
414  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
415  Operands.push_back(SimpleOp);
416  else
417  Operands.push_back(*I);
418  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
419 }
420 
421 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
422  // Check whether inlining will turn a dynamic alloca into a static
423  // alloca and handle that case.
424  if (I.isArrayAllocation()) {
425  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
426  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
427  Type *Ty = I.getAllocatedType();
428  AllocatedSize = SaturatingMultiplyAdd(
429  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
430  return Base::visitAlloca(I);
431  }
432  }
433 
434  // Accumulate the allocated size.
435  if (I.isStaticAlloca()) {
436  Type *Ty = I.getAllocatedType();
437  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
438  }
439 
440  // We will happily inline static alloca instructions.
441  if (I.isStaticAlloca())
442  return Base::visitAlloca(I);
443 
444  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
445  // a variety of reasons, and so we would like to not inline them into
446  // functions which don't currently have a dynamic alloca. This simply
447  // disables inlining altogether in the presence of a dynamic alloca.
448  HasDynamicAlloca = true;
449  return false;
450 }
451 
452 bool CallAnalyzer::visitPHI(PHINode &I) {
453  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
454  // though we don't want to propagate it's bonuses. The idea is to disable
455  // SROA if it *might* be used in an inappropriate manner.
456 
457  // Phi nodes are always zero-cost.
458  // FIXME: Pointer sizes may differ between different address spaces, so do we
459  // need to use correct address space in the call to getPointerSizeInBits here?
460  // Or could we skip the getPointerSizeInBits call completely? As far as I can
461  // see the ZeroOffset is used as a dummy value, so we can probably use any
462  // bit width for the ZeroOffset?
463  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
464  bool CheckSROA = I.getType()->isPointerTy();
465 
466  // Track the constant or pointer with constant offset we've seen so far.
467  Constant *FirstC = nullptr;
468  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
469  Value *FirstV = nullptr;
470 
471  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
472  BasicBlock *Pred = I.getIncomingBlock(i);
473  // If the incoming block is dead, skip the incoming block.
474  if (DeadBlocks.count(Pred))
475  continue;
476  // If the parent block of phi is not the known successor of the incoming
477  // block, skip the incoming block.
478  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
479  if (KnownSuccessor && KnownSuccessor != I.getParent())
480  continue;
481 
482  Value *V = I.getIncomingValue(i);
483  // If the incoming value is this phi itself, skip the incoming value.
484  if (&I == V)
485  continue;
486 
487  Constant *C = dyn_cast<Constant>(V);
488  if (!C)
489  C = SimplifiedValues.lookup(V);
490 
491  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
492  if (!C && CheckSROA)
493  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
494 
495  if (!C && !BaseAndOffset.first)
496  // The incoming value is neither a constant nor a pointer with constant
497  // offset, exit early.
498  return true;
499 
500  if (FirstC) {
501  if (FirstC == C)
502  // If we've seen a constant incoming value before and it is the same
503  // constant we see this time, continue checking the next incoming value.
504  continue;
505  // Otherwise early exit because we either see a different constant or saw
506  // a constant before but we have a pointer with constant offset this time.
507  return true;
508  }
509 
510  if (FirstV) {
511  // The same logic as above, but check pointer with constant offset here.
512  if (FirstBaseAndOffset == BaseAndOffset)
513  continue;
514  return true;
515  }
516 
517  if (C) {
518  // This is the 1st time we've seen a constant, record it.
519  FirstC = C;
520  continue;
521  }
522 
523  // The remaining case is that this is the 1st time we've seen a pointer with
524  // constant offset, record it.
525  FirstV = V;
526  FirstBaseAndOffset = BaseAndOffset;
527  }
528 
529  // Check if we can map phi to a constant.
530  if (FirstC) {
531  SimplifiedValues[&I] = FirstC;
532  return true;
533  }
534 
535  // Check if we can map phi to a pointer with constant offset.
536  if (FirstBaseAndOffset.first) {
537  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
538 
539  Value *SROAArg;
541  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
542  SROAArgValues[&I] = SROAArg;
543  }
544 
545  return true;
546 }
547 
548 /// Check we can fold GEPs of constant-offset call site argument pointers.
549 /// This requires target data and inbounds GEPs.
550 ///
551 /// \return true if the specified GEP can be folded.
552 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
553  // Check if we have a base + offset for the pointer.
554  std::pair<Value *, APInt> BaseAndOffset =
555  ConstantOffsetPtrs.lookup(I.getPointerOperand());
556  if (!BaseAndOffset.first)
557  return false;
558 
559  // Check if the offset of this GEP is constant, and if so accumulate it
560  // into Offset.
561  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
562  return false;
563 
564  // Add the result as a new mapping to Base + Offset.
565  ConstantOffsetPtrs[&I] = BaseAndOffset;
566 
567  return true;
568 }
569 
570 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
571  Value *SROAArg;
573  bool SROACandidate =
574  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
575 
576  // Lambda to check whether a GEP's indices are all constant.
577  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
578  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
579  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
580  return false;
581  return true;
582  };
583 
584  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
585  if (SROACandidate)
586  SROAArgValues[&I] = SROAArg;
587 
588  // Constant GEPs are modeled as free.
589  return true;
590  }
591 
592  // Variable GEPs will require math and will disable SROA.
593  if (SROACandidate)
594  disableSROA(CostIt);
595  return isGEPFree(I);
596 }
597 
598 /// Simplify \p I if its operands are constants and update SimplifiedValues.
599 /// \p Evaluate is a callable specific to instruction type that evaluates the
600 /// instruction when all the operands are constants.
601 template <typename Callable>
602 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
604  for (Value *Op : I.operands()) {
605  Constant *COp = dyn_cast<Constant>(Op);
606  if (!COp)
607  COp = SimplifiedValues.lookup(Op);
608  if (!COp)
609  return false;
610  COps.push_back(COp);
611  }
612  auto *C = Evaluate(COps);
613  if (!C)
614  return false;
615  SimplifiedValues[&I] = C;
616  return true;
617 }
618 
619 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
620  // Propagate constants through bitcasts.
621  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
622  return ConstantExpr::getBitCast(COps[0], I.getType());
623  }))
624  return true;
625 
626  // Track base/offsets through casts
627  std::pair<Value *, APInt> BaseAndOffset =
628  ConstantOffsetPtrs.lookup(I.getOperand(0));
629  // Casts don't change the offset, just wrap it up.
630  if (BaseAndOffset.first)
631  ConstantOffsetPtrs[&I] = BaseAndOffset;
632 
633  // Also look for SROA candidates here.
634  Value *SROAArg;
636  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
637  SROAArgValues[&I] = SROAArg;
638 
639  // Bitcasts are always zero cost.
640  return true;
641 }
642 
643 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
644  // Propagate constants through ptrtoint.
645  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
646  return ConstantExpr::getPtrToInt(COps[0], I.getType());
647  }))
648  return true;
649 
650  // Track base/offset pairs when converted to a plain integer provided the
651  // integer is large enough to represent the pointer.
652  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
653  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
654  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
655  std::pair<Value *, APInt> BaseAndOffset =
656  ConstantOffsetPtrs.lookup(I.getOperand(0));
657  if (BaseAndOffset.first)
658  ConstantOffsetPtrs[&I] = BaseAndOffset;
659  }
660 
661  // This is really weird. Technically, ptrtoint will disable SROA. However,
662  // unless that ptrtoint is *used* somewhere in the live basic blocks after
663  // inlining, it will be nuked, and SROA should proceed. All of the uses which
664  // would block SROA would also block SROA if applied directly to a pointer,
665  // and so we can just add the integer in here. The only places where SROA is
666  // preserved either cannot fire on an integer, or won't in-and-of themselves
667  // disable SROA (ext) w/o some later use that we would see and disable.
668  Value *SROAArg;
670  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
671  SROAArgValues[&I] = SROAArg;
672 
673  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
674 }
675 
676 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
677  // Propagate constants through ptrtoint.
678  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
679  return ConstantExpr::getIntToPtr(COps[0], I.getType());
680  }))
681  return true;
682 
683  // Track base/offset pairs when round-tripped through a pointer without
684  // modifications provided the integer is not too large.
685  Value *Op = I.getOperand(0);
686  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
687  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
688  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
689  if (BaseAndOffset.first)
690  ConstantOffsetPtrs[&I] = BaseAndOffset;
691  }
692 
693  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
694  Value *SROAArg;
696  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
697  SROAArgValues[&I] = SROAArg;
698 
699  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
700 }
701 
702 bool CallAnalyzer::visitCastInst(CastInst &I) {
703  // Propagate constants through ptrtoint.
704  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
705  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
706  }))
707  return true;
708 
709  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
710  disableSROA(I.getOperand(0));
711 
712  // If this is a floating-point cast, and the target says this operation
713  // is expensive, this may eventually become a library call. Treat the cost
714  // as such.
715  switch (I.getOpcode()) {
716  case Instruction::FPTrunc:
717  case Instruction::FPExt:
718  case Instruction::UIToFP:
719  case Instruction::SIToFP:
720  case Instruction::FPToUI:
721  case Instruction::FPToSI:
722  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
724  break;
725  default:
726  break;
727  }
728 
729  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
730 }
731 
732 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
733  Value *Operand = I.getOperand(0);
734  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
735  return ConstantFoldInstOperands(&I, COps[0], DL);
736  }))
737  return true;
738 
739  // Disable any SROA on the argument to arbitrary unary operators.
740  disableSROA(Operand);
741 
742  return false;
743 }
744 
745 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
746  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
747 }
748 
749 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
750  // Does the *call site* have the NonNull attribute set on an argument? We
751  // use the attribute on the call site to memoize any analysis done in the
752  // caller. This will also trip if the callee function has a non-null
753  // parameter attribute, but that's a less interesting case because hopefully
754  // the callee would already have been simplified based on that.
755  if (Argument *A = dyn_cast<Argument>(V))
756  if (paramHasAttr(A, Attribute::NonNull))
757  return true;
758 
759  // Is this an alloca in the caller? This is distinct from the attribute case
760  // above because attributes aren't updated within the inliner itself and we
761  // always want to catch the alloca derived case.
762  if (isAllocaDerivedArg(V))
763  // We can actually predict the result of comparisons between an
764  // alloca-derived value and null. Note that this fires regardless of
765  // SROA firing.
766  return true;
767 
768  return false;
769 }
770 
771 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
772  // If the normal destination of the invoke or the parent block of the call
773  // site is unreachable-terminated, there is little point in inlining this
774  // unless there is literally zero cost.
775  // FIXME: Note that it is possible that an unreachable-terminated block has a
776  // hot entry. For example, in below scenario inlining hot_call_X() may be
777  // beneficial :
778  // main() {
779  // hot_call_1();
780  // ...
781  // hot_call_N()
782  // exit(0);
783  // }
784  // For now, we are not handling this corner case here as it is rare in real
785  // code. In future, we should elaborate this based on BPI and BFI in more
786  // general threshold adjusting heuristics in updateThreshold().
787  Instruction *Instr = CS.getInstruction();
788  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
789  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
790  return false;
791  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
792  return false;
793 
794  return true;
795 }
796 
798  // If global profile summary is available, then callsite's coldness is
799  // determined based on that.
800  if (PSI && PSI->hasProfileSummary())
801  return PSI->isColdCallSite(CS, CallerBFI);
802 
803  // Otherwise we need BFI to be available.
804  if (!CallerBFI)
805  return false;
806 
807  // Determine if the callsite is cold relative to caller's entry. We could
808  // potentially cache the computation of scaled entry frequency, but the added
809  // complexity is not worth it unless this scaling shows up high in the
810  // profiles.
811  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
812  auto CallSiteBB = CS.getInstruction()->getParent();
813  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
814  auto CallerEntryFreq =
815  CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
816  return CallSiteFreq < CallerEntryFreq * ColdProb;
817 }
818 
820 CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
821  BlockFrequencyInfo *CallerBFI) {
822 
823  // If global profile summary is available, then callsite's hotness is
824  // determined based on that.
825  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
826  return Params.HotCallSiteThreshold;
827 
828  // Otherwise we need BFI to be available and to have a locally hot callsite
829  // threshold.
830  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
831  return None;
832 
833  // Determine if the callsite is hot relative to caller's entry. We could
834  // potentially cache the computation of scaled entry frequency, but the added
835  // complexity is not worth it unless this scaling shows up high in the
836  // profiles.
837  auto CallSiteBB = CS.getInstruction()->getParent();
838  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
839  auto CallerEntryFreq = CallerBFI->getEntryFreq();
840  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
841  return Params.LocallyHotCallSiteThreshold;
842 
843  // Otherwise treat it normally.
844  return None;
845 }
846 
847 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
848  // If no size growth is allowed for this inlining, set Threshold to 0.
849  if (!allowSizeGrowth(CS)) {
850  Threshold = 0;
851  return;
852  }
853 
854  Function *Caller = CS.getCaller();
855 
856  // return min(A, B) if B is valid.
857  auto MinIfValid = [](int A, Optional<int> B) {
858  return B ? std::min(A, B.getValue()) : A;
859  };
860 
861  // return max(A, B) if B is valid.
862  auto MaxIfValid = [](int A, Optional<int> B) {
863  return B ? std::max(A, B.getValue()) : A;
864  };
865 
866  // Various bonus percentages. These are multiplied by Threshold to get the
867  // bonus values.
868  // SingleBBBonus: This bonus is applied if the callee has a single reachable
869  // basic block at the given callsite context. This is speculatively applied
870  // and withdrawn if more than one basic block is seen.
871  //
872  // Vector bonuses: We want to more aggressively inline vector-dense kernels
873  // and apply this bonus based on the percentage of vector instructions. A
874  // bonus is applied if the vector instructions exceed 50% and half that amount
875  // is applied if it exceeds 10%. Note that these bonuses are some what
876  // arbitrary and evolved over time by accident as much as because they are
877  // principled bonuses.
878  // FIXME: It would be nice to base the bonus values on something more
879  // scientific.
880  //
881  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
882  // of the last call to a static function as inlining such functions is
883  // guaranteed to reduce code size.
884  //
885  // These bonus percentages may be set to 0 based on properties of the caller
886  // and the callsite.
887  int SingleBBBonusPercent = 50;
888  int VectorBonusPercent = 150;
890 
891  // Lambda to set all the above bonus and bonus percentages to 0.
892  auto DisallowAllBonuses = [&]() {
893  SingleBBBonusPercent = 0;
894  VectorBonusPercent = 0;
895  LastCallToStaticBonus = 0;
896  };
897 
898  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
899  // and reduce the threshold if the caller has the necessary attribute.
900  if (Caller->hasMinSize()) {
901  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
902  // For minsize, we want to disable the single BB bonus and the vector
903  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
904  // a static function will, at the minimum, eliminate the parameter setup and
905  // call/return instructions.
906  SingleBBBonusPercent = 0;
907  VectorBonusPercent = 0;
908  } else if (Caller->hasOptSize())
909  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
910 
911  // Adjust the threshold based on inlinehint attribute and profile based
912  // hotness information if the caller does not have MinSize attribute.
913  if (!Caller->hasMinSize()) {
914  if (Callee.hasFnAttribute(Attribute::InlineHint))
915  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
916 
917  // FIXME: After switching to the new passmanager, simplify the logic below
918  // by checking only the callsite hotness/coldness as we will reliably
919  // have local profile information.
920  //
921  // Callsite hotness and coldness can be determined if sample profile is
922  // used (which adds hotness metadata to calls) or if caller's
923  // BlockFrequencyInfo is available.
924  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
925  auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
926  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
927  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
928  // FIXME: This should update the threshold only if it exceeds the
929  // current threshold, but AutoFDO + ThinLTO currently relies on this
930  // behavior to prevent inlining of hot callsites during ThinLTO
931  // compile phase.
932  Threshold = HotCallSiteThreshold.getValue();
933  } else if (isColdCallSite(CS, CallerBFI)) {
934  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
935  // Do not apply bonuses for a cold callsite including the
936  // LastCallToStatic bonus. While this bonus might result in code size
937  // reduction, it can cause the size of a non-cold caller to increase
938  // preventing it from being inlined.
939  DisallowAllBonuses();
940  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
941  } else if (PSI) {
942  // Use callee's global profile information only if we have no way of
943  // determining this via callsite information.
944  if (PSI->isFunctionEntryHot(&Callee)) {
945  LLVM_DEBUG(dbgs() << "Hot callee.\n");
946  // If callsite hotness can not be determined, we may still know
947  // that the callee is hot and treat it as a weaker hint for threshold
948  // increase.
949  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
950  } else if (PSI->isFunctionEntryCold(&Callee)) {
951  LLVM_DEBUG(dbgs() << "Cold callee.\n");
952  // Do not apply bonuses for a cold callee including the
953  // LastCallToStatic bonus. While this bonus might result in code size
954  // reduction, it can cause the size of a non-cold caller to increase
955  // preventing it from being inlined.
956  DisallowAllBonuses();
957  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
958  }
959  }
960  }
961 
962  // Finally, take the target-specific inlining threshold multiplier into
963  // account.
964  Threshold *= TTI.getInliningThresholdMultiplier();
965 
966  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
967  VectorBonus = Threshold * VectorBonusPercent / 100;
968 
969  bool OnlyOneCallAndLocalLinkage =
970  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
971  // If there is only one call of the function, and it has internal linkage,
972  // the cost of inlining it drops dramatically. It may seem odd to update
973  // Cost in updateThreshold, but the bonus depends on the logic in this method.
974  if (OnlyOneCallAndLocalLinkage)
975  Cost -= LastCallToStaticBonus;
976 }
977 
978 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
979  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
980  // First try to handle simplified comparisons.
981  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
982  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
983  }))
984  return true;
985 
986  if (I.getOpcode() == Instruction::FCmp)
987  return false;
988 
989  // Otherwise look for a comparison between constant offset pointers with
990  // a common base.
991  Value *LHSBase, *RHSBase;
992  APInt LHSOffset, RHSOffset;
993  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
994  if (LHSBase) {
995  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
996  if (RHSBase && LHSBase == RHSBase) {
997  // We have common bases, fold the icmp to a constant based on the
998  // offsets.
999  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1000  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1001  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1002  SimplifiedValues[&I] = C;
1003  ++NumConstantPtrCmps;
1004  return true;
1005  }
1006  }
1007  }
1008 
1009  // If the comparison is an equality comparison with null, we can simplify it
1010  // if we know the value (argument) can't be null
1011  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1012  isKnownNonNullInCallee(I.getOperand(0))) {
1013  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1014  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1016  return true;
1017  }
1018  // Finally check for SROA candidates in comparisons.
1019  Value *SROAArg;
1021  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1022  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1023  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1024  return true;
1025  }
1026 
1027  disableSROA(CostIt);
1028  }
1029 
1030  return false;
1031 }
1032 
1033 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1034  // Try to handle a special case: we can fold computing the difference of two
1035  // constant-related pointers.
1036  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1037  Value *LHSBase, *RHSBase;
1038  APInt LHSOffset, RHSOffset;
1039  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1040  if (LHSBase) {
1041  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1042  if (RHSBase && LHSBase == RHSBase) {
1043  // We have common bases, fold the subtract to a constant based on the
1044  // offsets.
1045  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1046  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1047  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1048  SimplifiedValues[&I] = C;
1049  ++NumConstantPtrDiffs;
1050  return true;
1051  }
1052  }
1053  }
1054 
1055  // Otherwise, fall back to the generic logic for simplifying and handling
1056  // instructions.
1057  return Base::visitSub(I);
1058 }
1059 
1060 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1061  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1062  Constant *CLHS = dyn_cast<Constant>(LHS);
1063  if (!CLHS)
1064  CLHS = SimplifiedValues.lookup(LHS);
1065  Constant *CRHS = dyn_cast<Constant>(RHS);
1066  if (!CRHS)
1067  CRHS = SimplifiedValues.lookup(RHS);
1068 
1069  Value *SimpleV = nullptr;
1070  if (auto FI = dyn_cast<FPMathOperator>(&I))
1071  SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1072  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1073  else
1074  SimpleV =
1075  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1076 
1077  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1078  SimplifiedValues[&I] = C;
1079 
1080  if (SimpleV)
1081  return true;
1082 
1083  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1084  disableSROA(LHS);
1085  disableSROA(RHS);
1086 
1087  // If the instruction is floating point, and the target says this operation
1088  // is expensive, this may eventually become a library call. Treat the cost
1089  // as such.
1090  if (I.getType()->isFloatingPointTy() &&
1091  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
1093 
1094  return false;
1095 }
1096 
1097 bool CallAnalyzer::visitLoad(LoadInst &I) {
1098  Value *SROAArg;
1100  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1101  if (I.isSimple()) {
1102  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1103  return true;
1104  }
1105 
1106  disableSROA(CostIt);
1107  }
1108 
1109  // If the data is already loaded from this address and hasn't been clobbered
1110  // by any stores or calls, this load is likely to be redundant and can be
1111  // eliminated.
1112  if (EnableLoadElimination &&
1113  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1114  LoadEliminationCost += InlineConstants::InstrCost;
1115  return true;
1116  }
1117 
1118  return false;
1119 }
1120 
1121 bool CallAnalyzer::visitStore(StoreInst &I) {
1122  Value *SROAArg;
1124  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1125  if (I.isSimple()) {
1126  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1127  return true;
1128  }
1129 
1130  disableSROA(CostIt);
1131  }
1132 
1133  // The store can potentially clobber loads and prevent repeated loads from
1134  // being eliminated.
1135  // FIXME:
1136  // 1. We can probably keep an initial set of eliminatable loads substracted
1137  // from the cost even when we finally see a store. We just need to disable
1138  // *further* accumulation of elimination savings.
1139  // 2. We should probably at some point thread MemorySSA for the callee into
1140  // this and then use that to actually compute *really* precise savings.
1141  disableLoadElimination();
1142  return false;
1143 }
1144 
1145 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1146  // Constant folding for extract value is trivial.
1147  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1148  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1149  }))
1150  return true;
1151 
1152  // SROA can look through these but give them a cost.
1153  return false;
1154 }
1155 
1156 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1157  // Constant folding for insert value is trivial.
1158  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1159  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1160  /*InsertedValueOperand*/ COps[1],
1161  I.getIndices());
1162  }))
1163  return true;
1164 
1165  // SROA can look through these but give them a cost.
1166  return false;
1167 }
1168 
1169 /// Try to simplify a call site.
1170 ///
1171 /// Takes a concrete function and callsite and tries to actually simplify it by
1172 /// analyzing the arguments and call itself with instsimplify. Returns true if
1173 /// it has simplified the callsite to some other entity (a constant), making it
1174 /// free.
1175 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
1176  // FIXME: Using the instsimplify logic directly for this is inefficient
1177  // because we have to continually rebuild the argument list even when no
1178  // simplifications can be performed. Until that is fixed with remapping
1179  // inside of instsimplify, directly constant fold calls here.
1180  if (!canConstantFoldCallTo(cast<CallBase>(CS.getInstruction()), F))
1181  return false;
1182 
1183  // Try to re-map the arguments to constants.
1184  SmallVector<Constant *, 4> ConstantArgs;
1185  ConstantArgs.reserve(CS.arg_size());
1186  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
1187  ++I) {
1188  Constant *C = dyn_cast<Constant>(*I);
1189  if (!C)
1190  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
1191  if (!C)
1192  return false; // This argument doesn't map to a constant.
1193 
1194  ConstantArgs.push_back(C);
1195  }
1196  if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), F,
1197  ConstantArgs)) {
1198  SimplifiedValues[CS.getInstruction()] = C;
1199  return true;
1200  }
1201 
1202  return false;
1203 }
1204 
1205 bool CallAnalyzer::visitCallSite(CallSite CS) {
1206  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
1207  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1208  // This aborts the entire analysis.
1209  ExposesReturnsTwice = true;
1210  return false;
1211  }
1212  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
1213  ContainsNoDuplicateCall = true;
1214 
1215  if (Function *F = CS.getCalledFunction()) {
1216  // When we have a concrete function, first try to simplify it directly.
1217  if (simplifyCallSite(F, CS))
1218  return true;
1219 
1220  // Next check if it is an intrinsic we know about.
1221  // FIXME: Lift this into part of the InstVisitor.
1222  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
1223  switch (II->getIntrinsicID()) {
1224  default:
1225  if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1226  disableLoadElimination();
1227  return Base::visitCallSite(CS);
1228 
1229  case Intrinsic::load_relative:
1230  // This is normally lowered to 4 LLVM instructions.
1231  Cost += 3 * InlineConstants::InstrCost;
1232  return false;
1233 
1234  case Intrinsic::memset:
1235  case Intrinsic::memcpy:
1236  case Intrinsic::memmove:
1237  disableLoadElimination();
1238  // SROA can usually chew through these intrinsics, but they aren't free.
1239  return false;
1240  case Intrinsic::icall_branch_funnel:
1241  case Intrinsic::localescape:
1242  HasUninlineableIntrinsic = true;
1243  return false;
1244  case Intrinsic::vastart:
1245  InitsVargArgs = true;
1246  return false;
1247  }
1248  }
1249 
1250  if (F == CS.getInstruction()->getFunction()) {
1251  // This flag will fully abort the analysis, so don't bother with anything
1252  // else.
1253  IsRecursiveCall = true;
1254  return false;
1255  }
1256 
1257  if (TTI.isLoweredToCall(F)) {
1258  // We account for the average 1 instruction per call argument setup
1259  // here.
1260  Cost += CS.arg_size() * InlineConstants::InstrCost;
1261 
1262  // Everything other than inline ASM will also have a significant cost
1263  // merely from making the call.
1264  if (!isa<InlineAsm>(CS.getCalledValue()))
1266  }
1267 
1268  if (!CS.onlyReadsMemory())
1269  disableLoadElimination();
1270  return Base::visitCallSite(CS);
1271  }
1272 
1273  // Otherwise we're in a very special case -- an indirect function call. See
1274  // if we can be particularly clever about this.
1275  Value *Callee = CS.getCalledValue();
1276 
1277  // First, pay the price of the argument setup. We account for the average
1278  // 1 instruction per call argument setup here.
1279  Cost += CS.arg_size() * InlineConstants::InstrCost;
1280 
1281  // Next, check if this happens to be an indirect function call to a known
1282  // function in this inline context. If not, we've done all we can.
1283  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1284  if (!F) {
1285  if (!CS.onlyReadsMemory())
1286  disableLoadElimination();
1287  return Base::visitCallSite(CS);
1288  }
1289 
1290  // If we have a constant that we are calling as a function, we can peer
1291  // through it and see the function target. This happens not infrequently
1292  // during devirtualization and so we want to give it a hefty bonus for
1293  // inlining, but cap that bonus in the event that inlining wouldn't pan
1294  // out. Pretend to inline the function, with a custom threshold.
1295  auto IndirectCallParams = Params;
1297  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
1298  IndirectCallParams);
1299  if (CA.analyzeCall(CS)) {
1300  // We were able to inline the indirect call! Subtract the cost from the
1301  // threshold to get the bonus we want to apply, but don't go below zero.
1302  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1303  }
1304 
1305  if (!F->onlyReadsMemory())
1306  disableLoadElimination();
1307  return Base::visitCallSite(CS);
1308 }
1309 
1310 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1311  // At least one return instruction will be free after inlining.
1312  bool Free = !HasReturn;
1313  HasReturn = true;
1314  return Free;
1315 }
1316 
1317 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1318  // We model unconditional branches as essentially free -- they really
1319  // shouldn't exist at all, but handling them makes the behavior of the
1320  // inliner more regular and predictable. Interestingly, conditional branches
1321  // which will fold away are also free.
1322  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1323  dyn_cast_or_null<ConstantInt>(
1324  SimplifiedValues.lookup(BI.getCondition()));
1325 }
1326 
1327 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1328  bool CheckSROA = SI.getType()->isPointerTy();
1329  Value *TrueVal = SI.getTrueValue();
1330  Value *FalseVal = SI.getFalseValue();
1331 
1332  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1333  if (!TrueC)
1334  TrueC = SimplifiedValues.lookup(TrueVal);
1335  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1336  if (!FalseC)
1337  FalseC = SimplifiedValues.lookup(FalseVal);
1338  Constant *CondC =
1339  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1340 
1341  if (!CondC) {
1342  // Select C, X, X => X
1343  if (TrueC == FalseC && TrueC) {
1344  SimplifiedValues[&SI] = TrueC;
1345  return true;
1346  }
1347 
1348  if (!CheckSROA)
1349  return Base::visitSelectInst(SI);
1350 
1351  std::pair<Value *, APInt> TrueBaseAndOffset =
1352  ConstantOffsetPtrs.lookup(TrueVal);
1353  std::pair<Value *, APInt> FalseBaseAndOffset =
1354  ConstantOffsetPtrs.lookup(FalseVal);
1355  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1356  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1357 
1358  Value *SROAArg;
1360  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1361  SROAArgValues[&SI] = SROAArg;
1362  return true;
1363  }
1364 
1365  return Base::visitSelectInst(SI);
1366  }
1367 
1368  // Select condition is a constant.
1369  Value *SelectedV = CondC->isAllOnesValue()
1370  ? TrueVal
1371  : (CondC->isNullValue()) ? FalseVal : nullptr;
1372  if (!SelectedV) {
1373  // Condition is a vector constant that is not all 1s or all 0s. If all
1374  // operands are constants, ConstantExpr::getSelect() can handle the cases
1375  // such as select vectors.
1376  if (TrueC && FalseC) {
1377  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1378  SimplifiedValues[&SI] = C;
1379  return true;
1380  }
1381  }
1382  return Base::visitSelectInst(SI);
1383  }
1384 
1385  // Condition is either all 1s or all 0s. SI can be simplified.
1386  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1387  SimplifiedValues[&SI] = SelectedC;
1388  return true;
1389  }
1390 
1391  if (!CheckSROA)
1392  return true;
1393 
1394  std::pair<Value *, APInt> BaseAndOffset =
1395  ConstantOffsetPtrs.lookup(SelectedV);
1396  if (BaseAndOffset.first) {
1397  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1398 
1399  Value *SROAArg;
1401  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1402  SROAArgValues[&SI] = SROAArg;
1403  }
1404 
1405  return true;
1406 }
1407 
1408 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1409  // We model unconditional switches as free, see the comments on handling
1410  // branches.
1411  if (isa<ConstantInt>(SI.getCondition()))
1412  return true;
1413  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1414  if (isa<ConstantInt>(V))
1415  return true;
1416 
1417  // Assume the most general case where the switch is lowered into
1418  // either a jump table, bit test, or a balanced binary tree consisting of
1419  // case clusters without merging adjacent clusters with the same
1420  // destination. We do not consider the switches that are lowered with a mix
1421  // of jump table/bit test/binary search tree. The cost of the switch is
1422  // proportional to the size of the tree or the size of jump table range.
1423  //
1424  // NB: We convert large switches which are just used to initialize large phi
1425  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1426  // inlining those. It will prevent inlining in cases where the optimization
1427  // does not (yet) fire.
1428 
1429  // Maximum valid cost increased in this function.
1430  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1431 
1432  // Exit early for a large switch, assuming one case needs at least one
1433  // instruction.
1434  // FIXME: This is not true for a bit test, but ignore such case for now to
1435  // save compile-time.
1436  int64_t CostLowerBound =
1437  std::min((int64_t)CostUpperBound,
1438  (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1439 
1440  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
1441  Cost = CostLowerBound;
1442  return false;
1443  }
1444 
1445  unsigned JumpTableSize = 0;
1446  unsigned NumCaseCluster =
1447  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1448 
1449  // If suitable for a jump table, consider the cost for the table size and
1450  // branch to destination.
1451  if (JumpTableSize) {
1452  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1453  4 * InlineConstants::InstrCost;
1454 
1455  Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1456  return false;
1457  }
1458 
1459  // Considering forming a binary search, we should find the number of nodes
1460  // which is same as the number of comparisons when lowered. For a given
1461  // number of clusters, n, we can define a recursive function, f(n), to find
1462  // the number of nodes in the tree. The recursion is :
1463  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1464  // and f(n) = n, when n <= 3.
1465  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1466  // when n > 3. So, the number of comparisons from leaves should be n, while
1467  // the number of non-leaf should be :
1468  // 2^(log2(n) - 1) - 1
1469  // = 2^log2(n) * 2^-1 - 1
1470  // = n / 2 - 1.
1471  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1472  // number of comparisons in a simple closed form :
1473  // n + n / 2 - 1 = n * 3 / 2 - 1
1474  if (NumCaseCluster <= 3) {
1475  // Suppose a comparison includes one compare and one conditional branch.
1476  Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1477  return false;
1478  }
1479 
1480  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1481  int64_t SwitchCost =
1482  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1483 
1484  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1485  return false;
1486 }
1487 
1488 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1489  // We never want to inline functions that contain an indirectbr. This is
1490  // incorrect because all the blockaddress's (in static global initializers
1491  // for example) would be referring to the original function, and this
1492  // indirect jump would jump from the inlined copy of the function into the
1493  // original function which is extremely undefined behavior.
1494  // FIXME: This logic isn't really right; we can safely inline functions with
1495  // indirectbr's as long as no other function or global references the
1496  // blockaddress of a block within the current function.
1497  HasIndirectBr = true;
1498  return false;
1499 }
1500 
1501 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1502  // FIXME: It's not clear that a single instruction is an accurate model for
1503  // the inline cost of a resume instruction.
1504  return false;
1505 }
1506 
1507 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1508  // FIXME: It's not clear that a single instruction is an accurate model for
1509  // the inline cost of a cleanupret instruction.
1510  return false;
1511 }
1512 
1513 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1514  // FIXME: It's not clear that a single instruction is an accurate model for
1515  // the inline cost of a catchret instruction.
1516  return false;
1517 }
1518 
1519 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1520  // FIXME: It might be reasonably to discount the cost of instructions leading
1521  // to unreachable as they have the lowest possible impact on both runtime and
1522  // code size.
1523  return true; // No actual code is needed for unreachable.
1524 }
1525 
1526 bool CallAnalyzer::visitInstruction(Instruction &I) {
1527  // Some instructions are free. All of the free intrinsics can also be
1528  // handled by SROA, etc.
1529  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1530  return true;
1531 
1532  // We found something we don't understand or can't handle. Mark any SROA-able
1533  // values in the operand list as no longer viable.
1534  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1535  disableSROA(*OI);
1536 
1537  return false;
1538 }
1539 
1540 /// Analyze a basic block for its contribution to the inline cost.
1541 ///
1542 /// This method walks the analyzer over every instruction in the given basic
1543 /// block and accounts for their cost during inlining at this callsite. It
1544 /// aborts early if the threshold has been exceeded or an impossible to inline
1545 /// construct has been detected. It returns false if inlining is no longer
1546 /// viable, and true if inlining remains viable.
1548 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1549  SmallPtrSetImpl<const Value *> &EphValues) {
1550  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1551  // FIXME: Currently, the number of instructions in a function regardless of
1552  // our ability to simplify them during inline to constants or dead code,
1553  // are actually used by the vector bonus heuristic. As long as that's true,
1554  // we have to special case debug intrinsics here to prevent differences in
1555  // inlining due to debug symbols. Eventually, the number of unsimplified
1556  // instructions shouldn't factor into the cost computation, but until then,
1557  // hack around it here.
1558  if (isa<DbgInfoIntrinsic>(I))
1559  continue;
1560 
1561  // Skip ephemeral values.
1562  if (EphValues.count(&*I))
1563  continue;
1564 
1565  ++NumInstructions;
1566  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1567  ++NumVectorInstructions;
1568 
1569  // If the instruction simplified to a constant, there is no cost to this
1570  // instruction. Visit the instructions using our InstVisitor to account for
1571  // all of the per-instruction logic. The visit tree returns true if we
1572  // consumed the instruction in any way, and false if the instruction's base
1573  // cost should count against inlining.
1574  if (Base::visit(&*I))
1575  ++NumInstructionsSimplified;
1576  else
1578 
1579  using namespace ore;
1580  // If the visit this instruction detected an uninlinable pattern, abort.
1581  InlineResult IR;
1582  if (IsRecursiveCall)
1583  IR = "recursive";
1584  else if (ExposesReturnsTwice)
1585  IR = "exposes returns twice";
1586  else if (HasDynamicAlloca)
1587  IR = "dynamic alloca";
1588  else if (HasIndirectBr)
1589  IR = "indirect branch";
1590  else if (HasUninlineableIntrinsic)
1591  IR = "uninlinable intrinsic";
1592  else if (InitsVargArgs)
1593  IR = "varargs";
1594  if (!IR) {
1595  if (ORE)
1596  ORE->emit([&]() {
1597  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1598  CandidateCS.getInstruction())
1599  << NV("Callee", &F) << " has uninlinable pattern ("
1600  << NV("InlineResult", IR.message)
1601  << ") and cost is not fully computed";
1602  });
1603  return IR;
1604  }
1605 
1606  // If the caller is a recursive function then we don't want to inline
1607  // functions which allocate a lot of stack space because it would increase
1608  // the caller stack usage dramatically.
1609  if (IsCallerRecursive &&
1611  InlineResult IR = "recursive and allocates too much stack space";
1612  if (ORE)
1613  ORE->emit([&]() {
1614  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1615  CandidateCS.getInstruction())
1616  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1617  << ". Cost is not fully computed";
1618  });
1619  return IR;
1620  }
1621 
1622  // Check if we've past the maximum possible threshold so we don't spin in
1623  // huge basic blocks that will never inline.
1624  if (Cost >= Threshold && !ComputeFullInlineCost)
1625  return false;
1626  }
1627 
1628  return true;
1629 }
1630 
1631 /// Compute the base pointer and cumulative constant offsets for V.
1632 ///
1633 /// This strips all constant offsets off of V, leaving it the base pointer, and
1634 /// accumulates the total constant offset applied in the returned constant. It
1635 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1636 /// no constant offsets applied.
1637 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1638  if (!V->getType()->isPointerTy())
1639  return nullptr;
1640 
1641  unsigned AS = V->getType()->getPointerAddressSpace();
1642  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1643  APInt Offset = APInt::getNullValue(IntPtrWidth);
1644 
1645  // Even though we don't look through PHI nodes, we could be called on an
1646  // instruction in an unreachable block, which may be on a cycle.
1647  SmallPtrSet<Value *, 4> Visited;
1648  Visited.insert(V);
1649  do {
1650  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1651  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1652  return nullptr;
1653  V = GEP->getPointerOperand();
1654  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1655  V = cast<Operator>(V)->getOperand(0);
1656  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1657  if (GA->isInterposable())
1658  break;
1659  V = GA->getAliasee();
1660  } else {
1661  break;
1662  }
1663  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1664  } while (Visited.insert(V).second);
1665 
1666  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1667  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1668 }
1669 
1670 /// Find dead blocks due to deleted CFG edges during inlining.
1671 ///
1672 /// If we know the successor of the current block, \p CurrBB, has to be \p
1673 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1674 /// no live incoming CFG edges. If one block is found to be dead, we can
1675 /// continue growing the dead block list by checking the successors of the dead
1676 /// blocks to see if all their incoming edges are dead or not.
1677 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1678  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1679  // A CFG edge is dead if the predecessor is dead or the predecessor has a
1680  // known successor which is not the one under exam.
1681  return (DeadBlocks.count(Pred) ||
1682  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1683  };
1684 
1685  auto IsNewlyDead = [&](BasicBlock *BB) {
1686  // If all the edges to a block are dead, the block is also dead.
1687  return (!DeadBlocks.count(BB) &&
1689  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1690  };
1691 
1692  for (BasicBlock *Succ : successors(CurrBB)) {
1693  if (Succ == NextBB || !IsNewlyDead(Succ))
1694  continue;
1696  NewDead.push_back(Succ);
1697  while (!NewDead.empty()) {
1698  BasicBlock *Dead = NewDead.pop_back_val();
1699  if (DeadBlocks.insert(Dead))
1700  // Continue growing the dead block lists.
1701  for (BasicBlock *S : successors(Dead))
1702  if (IsNewlyDead(S))
1703  NewDead.push_back(S);
1704  }
1705  }
1706 }
1707 
1708 /// Analyze a call site for potential inlining.
1709 ///
1710 /// Returns true if inlining this call is viable, and false if it is not
1711 /// viable. It computes the cost and adjusts the threshold based on numerous
1712 /// factors and heuristics. If this method returns false but the computed cost
1713 /// is below the computed threshold, then inlining was forcibly disabled by
1714 /// some artifact of the routine.
1715 InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
1716  ++NumCallsAnalyzed;
1717 
1718  // Perform some tweaks to the cost and threshold based on the direct
1719  // callsite information.
1720 
1721  // We want to more aggressively inline vector-dense kernels, so up the
1722  // threshold, and we'll lower it if the % of vector instructions gets too
1723  // low. Note that these bonuses are some what arbitrary and evolved over time
1724  // by accident as much as because they are principled bonuses.
1725  //
1726  // FIXME: It would be nice to remove all such bonuses. At least it would be
1727  // nice to base the bonus values on something more scientific.
1728  assert(NumInstructions == 0);
1729  assert(NumVectorInstructions == 0);
1730 
1731  // Update the threshold based on callsite properties
1732  updateThreshold(CS, F);
1733 
1734  // While Threshold depends on commandline options that can take negative
1735  // values, we want to enforce the invariant that the computed threshold and
1736  // bonuses are non-negative.
1737  assert(Threshold >= 0);
1738  assert(SingleBBBonus >= 0);
1739  assert(VectorBonus >= 0);
1740 
1741  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1742  // this Threshold any time, and cost cannot decrease, we can stop processing
1743  // the rest of the function body.
1744  Threshold += (SingleBBBonus + VectorBonus);
1745 
1746  // Give out bonuses for the callsite, as the instructions setting them up
1747  // will be gone after inlining.
1748  Cost -= getCallsiteCost(CS, DL);
1749 
1750  // If this function uses the coldcc calling convention, prefer not to inline
1751  // it.
1752  if (F.getCallingConv() == CallingConv::Cold)
1754 
1755  // Check if we're done. This can happen due to bonuses and penalties.
1756  if (Cost >= Threshold && !ComputeFullInlineCost)
1757  return "high cost";
1758 
1759  if (F.empty())
1760  return true;
1761 
1762  Function *Caller = CS.getInstruction()->getFunction();
1763  // Check if the caller function is recursive itself.
1764  for (User *U : Caller->users()) {
1765  CallSite Site(U);
1766  if (!Site)
1767  continue;
1768  Instruction *I = Site.getInstruction();
1769  if (I->getFunction() == Caller) {
1770  IsCallerRecursive = true;
1771  break;
1772  }
1773  }
1774 
1775  // Populate our simplified values by mapping from function arguments to call
1776  // arguments with known important simplifications.
1777  CallSite::arg_iterator CAI = CS.arg_begin();
1778  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1779  FAI != FAE; ++FAI, ++CAI) {
1780  assert(CAI != CS.arg_end());
1781  if (Constant *C = dyn_cast<Constant>(CAI))
1782  SimplifiedValues[&*FAI] = C;
1783 
1784  Value *PtrArg = *CAI;
1785  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1786  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1787 
1788  // We can SROA any pointer arguments derived from alloca instructions.
1789  if (isa<AllocaInst>(PtrArg)) {
1790  SROAArgValues[&*FAI] = PtrArg;
1791  SROAArgCosts[PtrArg] = 0;
1792  }
1793  }
1794  }
1795  NumConstantArgs = SimplifiedValues.size();
1796  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1797  NumAllocaArgs = SROAArgValues.size();
1798 
1799  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1800  // the ephemeral values multiple times (and they're completely determined by
1801  // the callee, so this is purely duplicate work).
1803  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1804 
1805  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1806  // adding basic blocks of the callee which can be proven to be dead for this
1807  // particular call site in order to get more accurate cost estimates. This
1808  // requires a somewhat heavyweight iteration pattern: we need to walk the
1809  // basic blocks in a breadth-first order as we insert live successors. To
1810  // accomplish this, prioritizing for small iterations because we exit after
1811  // crossing our threshold, we use a small-size optimized SetVector.
1814  BBSetVector;
1815  BBSetVector BBWorklist;
1816  BBWorklist.insert(&F.getEntryBlock());
1817  bool SingleBB = true;
1818  // Note that we *must not* cache the size, this loop grows the worklist.
1819  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1820  // Bail out the moment we cross the threshold. This means we'll under-count
1821  // the cost, but only when undercounting doesn't matter.
1822  if (Cost >= Threshold && !ComputeFullInlineCost)
1823  break;
1824 
1825  BasicBlock *BB = BBWorklist[Idx];
1826  if (BB->empty())
1827  continue;
1828 
1829  // Disallow inlining a blockaddress. A blockaddress only has defined
1830  // behavior for an indirect branch in the same function, and we do not
1831  // currently support inlining indirect branches. But, the inliner may not
1832  // see an indirect branch that ends up being dead code at a particular call
1833  // site. If the blockaddress escapes the function, e.g., via a global
1834  // variable, inlining may lead to an invalid cross-function reference.
1835  if (BB->hasAddressTaken())
1836  return "blockaddress";
1837 
1838  // Analyze the cost of this block. If we blow through the threshold, this
1839  // returns false, and we can bail on out.
1840  InlineResult IR = analyzeBlock(BB, EphValues);
1841  if (!IR)
1842  return IR;
1843 
1844  Instruction *TI = BB->getTerminator();
1845 
1846  // Add in the live successors by first checking whether we have terminator
1847  // that may be simplified based on the values simplified by this call.
1848  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1849  if (BI->isConditional()) {
1850  Value *Cond = BI->getCondition();
1851  if (ConstantInt *SimpleCond =
1852  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1853  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1854  BBWorklist.insert(NextBB);
1855  KnownSuccessors[BB] = NextBB;
1856  findDeadBlocks(BB, NextBB);
1857  continue;
1858  }
1859  }
1860  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1861  Value *Cond = SI->getCondition();
1862  if (ConstantInt *SimpleCond =
1863  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1864  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1865  BBWorklist.insert(NextBB);
1866  KnownSuccessors[BB] = NextBB;
1867  findDeadBlocks(BB, NextBB);
1868  continue;
1869  }
1870  }
1871 
1872  // If we're unable to select a particular successor, just count all of
1873  // them.
1874  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1875  ++TIdx)
1876  BBWorklist.insert(TI->getSuccessor(TIdx));
1877 
1878  // If we had any successors at this point, than post-inlining is likely to
1879  // have them as well. Note that we assume any basic blocks which existed
1880  // due to branches or switches which folded above will also fold after
1881  // inlining.
1882  if (SingleBB && TI->getNumSuccessors() > 1) {
1883  // Take off the bonus we applied to the threshold.
1884  Threshold -= SingleBBBonus;
1885  SingleBB = false;
1886  }
1887  }
1888 
1889  bool OnlyOneCallAndLocalLinkage =
1890  F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1891  // If this is a noduplicate call, we can still inline as long as
1892  // inlining this would cause the removal of the caller (so the instruction
1893  // is not actually duplicated, just moved).
1894  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1895  return "noduplicate";
1896 
1897  // Loops generally act a lot like calls in that they act like barriers to
1898  // movement, require a certain amount of setup, etc. So when optimising for
1899  // size, we penalise any call sites that perform loops. We do this after all
1900  // other costs here, so will likely only be dealing with relatively small
1901  // functions (and hence DT and LI will hopefully be cheap).
1902  if (Caller->hasMinSize()) {
1903  DominatorTree DT(F);
1904  LoopInfo LI(DT);
1905  int NumLoops = 0;
1906  for (Loop *L : LI) {
1907  // Ignore loops that will not be executed
1908  if (DeadBlocks.count(L->getHeader()))
1909  continue;
1910  NumLoops++;
1911  }
1912  Cost += NumLoops * InlineConstants::CallPenalty;
1913  }
1914 
1915  // We applied the maximum possible vector bonus at the beginning. Now,
1916  // subtract the excess bonus, if any, from the Threshold before
1917  // comparing against Cost.
1918  if (NumVectorInstructions <= NumInstructions / 10)
1919  Threshold -= VectorBonus;
1920  else if (NumVectorInstructions <= NumInstructions / 2)
1921  Threshold -= VectorBonus/2;
1922 
1923  return Cost < std::max(1, Threshold);
1924 }
1925 
1926 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1927 /// Dump stats about this call's analysis.
1929 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1930  DEBUG_PRINT_STAT(NumConstantArgs);
1931  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1932  DEBUG_PRINT_STAT(NumAllocaArgs);
1933  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1934  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1935  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1936  DEBUG_PRINT_STAT(NumInstructions);
1937  DEBUG_PRINT_STAT(SROACostSavings);
1938  DEBUG_PRINT_STAT(SROACostSavingsLost);
1939  DEBUG_PRINT_STAT(LoadEliminationCost);
1940  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1941  DEBUG_PRINT_STAT(Cost);
1942  DEBUG_PRINT_STAT(Threshold);
1943 #undef DEBUG_PRINT_STAT
1944 }
1945 #endif
1946 
1947 /// Test that there are no attribute conflicts between Caller and Callee
1948 /// that prevent inlining.
1950  Function *Callee,
1951  TargetTransformInfo &TTI) {
1952  return TTI.areInlineCompatible(Caller, Callee) &&
1953  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1954 }
1955 
1957  int Cost = 0;
1958  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1959  if (CS.isByValArgument(I)) {
1960  // We approximate the number of loads and stores needed by dividing the
1961  // size of the byval type by the target's pointer size.
1962  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1963  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1964  unsigned AS = PTy->getAddressSpace();
1965  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1966  // Ceiling division.
1967  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1968 
1969  // If it generates more than 8 stores it is likely to be expanded as an
1970  // inline memcpy so we take that as an upper bound. Otherwise we assume
1971  // one load and one store per word copied.
1972  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1973  // here instead of a magic number of 8, but it's not available via
1974  // DataLayout.
1975  NumStores = std::min(NumStores, 8U);
1976 
1977  Cost += 2 * NumStores * InlineConstants::InstrCost;
1978  } else {
1979  // For non-byval arguments subtract off one instruction per call
1980  // argument.
1982  }
1983  }
1984  // The call instruction also disappears after inlining.
1985  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1986  return Cost;
1987 }
1988 
1990  CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1991  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1994  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1995  GetAssumptionCache, GetBFI, PSI, ORE);
1996 }
1997 
1999  CallSite CS, Function *Callee, const InlineParams &Params,
2000  TargetTransformInfo &CalleeTTI,
2001  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2004 
2005  // Cannot inline indirect calls.
2006  if (!Callee)
2007  return llvm::InlineCost::getNever("indirect call");
2008 
2009  // Never inline calls with byval arguments that does not have the alloca
2010  // address space. Since byval arguments can be replaced with a copy to an
2011  // alloca, the inlined code would need to be adjusted to handle that the
2012  // argument is in the alloca address space (so it is a little bit complicated
2013  // to solve).
2014  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2015  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
2016  if (CS.isByValArgument(I)) {
2017  PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
2018  if (PTy->getAddressSpace() != AllocaAS)
2019  return llvm::InlineCost::getNever("byval arguments without alloca"
2020  " address space");
2021  }
2022 
2023  // Calls to functions with always-inline attributes should be inlined
2024  // whenever possible.
2025  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
2026  auto IsViable = isInlineViable(*Callee);
2027  if (IsViable)
2028  return llvm::InlineCost::getAlways("always inline attribute");
2029  return llvm::InlineCost::getNever(IsViable.message);
2030  }
2031 
2032  // Never inline functions with conflicting attributes (unless callee has
2033  // always-inline attribute).
2034  Function *Caller = CS.getCaller();
2035  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2036  return llvm::InlineCost::getNever("conflicting attributes");
2037 
2038  // Don't inline this call if the caller has the optnone attribute.
2039  if (Caller->hasOptNone())
2040  return llvm::InlineCost::getNever("optnone attribute");
2041 
2042  // Don't inline a function that treats null pointer as valid into a caller
2043  // that does not have this attribute.
2044  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2045  return llvm::InlineCost::getNever("nullptr definitions incompatible");
2046 
2047  // Don't inline functions which can be interposed at link-time.
2048  if (Callee->isInterposable())
2049  return llvm::InlineCost::getNever("interposable");
2050 
2051  // Don't inline functions marked noinline.
2052  if (Callee->hasFnAttribute(Attribute::NoInline))
2053  return llvm::InlineCost::getNever("noinline function attribute");
2054 
2055  // Don't inline call sites marked noinline.
2056  if (CS.isNoInline())
2057  return llvm::InlineCost::getNever("noinline call site attribute");
2058 
2059  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2060  << "... (caller:" << Caller->getName() << ")\n");
2061 
2062  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
2063  Params);
2064  InlineResult ShouldInline = CA.analyzeCall(CS);
2065 
2066  LLVM_DEBUG(CA.dump());
2067 
2068  // Check if there was a reason to force inlining or no inlining.
2069  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2070  return InlineCost::getNever(ShouldInline.message);
2071  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2072  return InlineCost::getAlways("empty function");
2073 
2074  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2075 }
2076 
2078  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2079  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2080  // Disallow inlining of functions which contain indirect branches or
2081  // blockaddresses.
2082  if (isa<IndirectBrInst>(BI->getTerminator()))
2083  return "contains indirect branches";
2084 
2085  if (BI->hasAddressTaken())
2086  return "uses block address";
2087 
2088  for (auto &II : *BI) {
2089  CallSite CS(&II);
2090  if (!CS)
2091  continue;
2092 
2093  // Disallow recursive calls.
2094  if (&F == CS.getCalledFunction())
2095  return "recursive call";
2096 
2097  // Disallow calls which expose returns-twice to a function not previously
2098  // attributed as such.
2099  if (!ReturnsTwice && CS.isCall() &&
2100  cast<CallInst>(CS.getInstruction())->canReturnTwice())
2101  return "exposes returns-twice attribute";
2102 
2103  if (CS.getCalledFunction())
2104  switch (CS.getCalledFunction()->getIntrinsicID()) {
2105  default:
2106  break;
2107  // Disallow inlining of @llvm.icall.branch.funnel because current
2108  // backend can't separate call targets from call arguments.
2109  case llvm::Intrinsic::icall_branch_funnel:
2110  return "disallowed inlining of @llvm.icall.branch.funnel";
2111  // Disallow inlining functions that call @llvm.localescape. Doing this
2112  // correctly would require major changes to the inliner.
2113  case llvm::Intrinsic::localescape:
2114  return "disallowed inlining of @llvm.localescape";
2115  // Disallow inlining of functions that initialize VarArgs with va_start.
2116  case llvm::Intrinsic::vastart:
2117  return "contains VarArgs initialized with va_start";
2118  }
2119  }
2120  }
2121 
2122  return true;
2123 }
2124 
2125 // APIs to create InlineParams based on command line flags and/or other
2126 // parameters.
2127 
2129  InlineParams Params;
2130 
2131  // This field is the threshold to use for a callee by default. This is
2132  // derived from one or more of:
2133  // * optimization or size-optimization levels,
2134  // * a value passed to createFunctionInliningPass function, or
2135  // * the -inline-threshold flag.
2136  // If the -inline-threshold flag is explicitly specified, that is used
2137  // irrespective of anything else.
2138  if (InlineThreshold.getNumOccurrences() > 0)
2140  else
2141  Params.DefaultThreshold = Threshold;
2142 
2143  // Set the HintThreshold knob from the -inlinehint-threshold.
2144  Params.HintThreshold = HintThreshold;
2145 
2146  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2147  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2148 
2149  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2150  // populate LocallyHotCallSiteThreshold. Later, we populate
2151  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2152  // we know that optimization level is O3 (in the getInlineParams variant that
2153  // takes the opt and size levels).
2154  // FIXME: Remove this check (and make the assignment unconditional) after
2155  // addressing size regression issues at O2.
2156  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2158 
2159  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2161 
2162  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2163  // -inlinehint-threshold commandline option is not explicitly given. If that
2164  // option is present, then its value applies even for callees with size and
2165  // minsize attributes.
2166  // If the -inline-threshold is not specified, set the ColdThreshold from the
2167  // -inlinecold-threshold even if it is not explicitly passed. If
2168  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2169  // explicitly specified to set the ColdThreshold knob
2170  if (InlineThreshold.getNumOccurrences() == 0) {
2173  Params.ColdThreshold = ColdThreshold;
2174  } else if (ColdThreshold.getNumOccurrences() > 0) {
2175  Params.ColdThreshold = ColdThreshold;
2176  }
2177  return Params;
2178 }
2179 
2182 }
2183 
2184 // Compute the default threshold for inlining based on the opt level and the
2185 // size opt level.
2186 static int computeThresholdFromOptLevels(unsigned OptLevel,
2187  unsigned SizeOptLevel) {
2188  if (OptLevel > 2)
2190  if (SizeOptLevel == 1) // -Os
2192  if (SizeOptLevel == 2) // -Oz
2194  return InlineThreshold;
2195 }
2196 
2197 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2198  auto Params =
2199  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2200  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2201  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2202  // when it is specified explicitly.
2203  if (OptLevel > 2)
2205  return Params;
2206 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
bool onlyReadsMemory() const
Determine if the function does not access or only reads memory.
Definition: Function.h:467
IterTy arg_end() const
Definition: CallSite.h:583
uint64_t CallInst * C
Return a value (possibly void), from a function.
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
bool isSimple() const
Definition: Instructions.h:276
bool empty() const
Definition: Function.h:664
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:99
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
Diagnostic information for missed-optimization remarks.
This instruction extracts a struct member or array element value from an aggregate value...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
DiagnosticInfoOptimizationBase::Argument NV
IterTy arg_begin() const
Definition: CallSite.h:579
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:594
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:164
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:600
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
iterator end()
Definition: Function.h:660
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:607
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:37
const int ColdccPenalty
Definition: InlineCost.h:47
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1769
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:419
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:298
BasicBlock * getSuccessor(unsigned i) const
arg_iterator arg_end()
Definition: Function.h:682
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:388
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1965
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2258
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
op_iterator op_begin()
Definition: User.h:229
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:775
Represents the cost of inlining a function.
Definition: InlineCost.h:63
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q)
Given operands for an FP BinaryOperator, fold the result or return null.
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:269
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2153
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:665
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:158
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:104
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
#define DEBUG_PRINT_STAT(x)
This class represents a cast from a pointer to an integer.
InstrTy * getInstruction() const
Definition: CallSite.h:96
bool empty() const
Definition: BasicBlock.h:279
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:716
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:606
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1987
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:998
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
const int LastCallToStaticBonus
Definition: InlineCost.h:46
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:658
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:91
const int CallPenalty
Definition: InlineCost.h:45
InlineResult is basically true or false.
Definition: InlineCost.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Definition: CallSite.h:462
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
Class to represent pointers.
Definition: DerivedTypes.h:498
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1782
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:642
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2180
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const int IndirectCallThreshold
Definition: InlineCost.h:44
bool isNoInline() const
Return true if the call should not be inlined.
Definition: CallSite.h:446
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Resume the propagation of an exception.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:40
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:193
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:278
op_iterator op_end()
Definition: User.h:231
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:167
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
arg_iterator arg_begin()
Definition: Function.h:673
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2062
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:681
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:174
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:838
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:391
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
unsigned arg_size() const
Definition: CallSite.h:226
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
#define DEBUG_TYPE
Definition: InlineCost.cpp:44
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:161
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:220
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
Definition: CallSite.h:370
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
iterator_range< user_iterator > users()
Definition: Value.h:399
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1539
const Value * getFalseValue() const
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:94
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
const char * message
Definition: InlineCost.h:136
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1756
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:597
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
Multiway switch.
bool isCall() const
Return true if a CallInst is enclosed.
Definition: CallSite.h:87
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:50
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:602
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:40
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:177
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2204
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:34
bool nullPointerIsDefined() const
Check if null pointer dereferencing is considered undefined behavior for the function.
Definition: Function.cpp:1434
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:568
bool isSimple() const
Definition: Instructions.h:401
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Statically lint checks LLVM IR
Definition: Lint.cpp:192
Type * getElementType() const
Definition: DerivedTypes.h:517
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:170
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:155
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:86
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:69
gep_type_iterator gep_type_begin(const User *GEP)
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:275