LLVM  mainline
InlineCost.cpp
Go to the documentation of this file.
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements inline cost analysis.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/Analysis/InlineCost.h"
00015 #include "llvm/ADT/STLExtras.h"
00016 #include "llvm/ADT/SetVector.h"
00017 #include "llvm/ADT/SmallPtrSet.h"
00018 #include "llvm/ADT/SmallVector.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/AssumptionCache.h"
00021 #include "llvm/Analysis/CodeMetrics.h"
00022 #include "llvm/Analysis/ConstantFolding.h"
00023 #include "llvm/Analysis/InstructionSimplify.h"
00024 #include "llvm/Analysis/TargetTransformInfo.h"
00025 #include "llvm/IR/CallSite.h"
00026 #include "llvm/IR/CallingConv.h"
00027 #include "llvm/IR/DataLayout.h"
00028 #include "llvm/IR/GetElementPtrTypeIterator.h"
00029 #include "llvm/IR/GlobalAlias.h"
00030 #include "llvm/IR/InstVisitor.h"
00031 #include "llvm/IR/IntrinsicInst.h"
00032 #include "llvm/IR/Operator.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Support/raw_ostream.h"
00035 
00036 using namespace llvm;
00037 
00038 #define DEBUG_TYPE "inline-cost"
00039 
00040 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
00041 
00042 // Threshold to use when optsize is specified (and there is no
00043 // -inline-threshold).
00044 const int OptSizeThreshold = 75;
00045 
00046 // Threshold to use when -Oz is specified (and there is no -inline-threshold).
00047 const int OptMinSizeThreshold = 25;
00048 
00049 // Threshold to use when -O[34] is specified (and there is no
00050 // -inline-threshold).
00051 const int OptAggressiveThreshold = 275;
00052 
00053 static cl::opt<int> DefaultInlineThreshold(
00054     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
00055     cl::desc("Control the amount of inlining to perform (default = 225)"));
00056 
00057 static cl::opt<int> HintThreshold(
00058     "inlinehint-threshold", cl::Hidden, cl::init(325),
00059     cl::desc("Threshold for inlining functions with inline hint"));
00060 
00061 // We introduce this threshold to help performance of instrumentation based
00062 // PGO before we actually hook up inliner with analysis passes such as BPI and
00063 // BFI.
00064 static cl::opt<int> ColdThreshold(
00065     "inlinecold-threshold", cl::Hidden, cl::init(225),
00066     cl::desc("Threshold for inlining functions with cold attribute"));
00067 
00068 namespace {
00069 
00070 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
00071   typedef InstVisitor<CallAnalyzer, bool> Base;
00072   friend class InstVisitor<CallAnalyzer, bool>;
00073 
00074   /// The TargetTransformInfo available for this compilation.
00075   const TargetTransformInfo &TTI;
00076 
00077   /// The cache of @llvm.assume intrinsics.
00078   AssumptionCacheTracker *ACT;
00079 
00080   // The called function.
00081   Function &F;
00082 
00083   // The candidate callsite being analyzed. Please do not use this to do
00084   // analysis in the caller function; we want the inline cost query to be
00085   // easily cacheable. Instead, use the cover function paramHasAttr.
00086   CallSite CandidateCS;
00087 
00088   int Threshold;
00089   int Cost;
00090 
00091   bool IsCallerRecursive;
00092   bool IsRecursiveCall;
00093   bool ExposesReturnsTwice;
00094   bool HasDynamicAlloca;
00095   bool ContainsNoDuplicateCall;
00096   bool HasReturn;
00097   bool HasIndirectBr;
00098   bool HasFrameEscape;
00099 
00100   /// Number of bytes allocated statically by the callee.
00101   uint64_t AllocatedSize;
00102   unsigned NumInstructions, NumVectorInstructions;
00103   int FiftyPercentVectorBonus, TenPercentVectorBonus;
00104   int VectorBonus;
00105 
00106   // While we walk the potentially-inlined instructions, we build up and
00107   // maintain a mapping of simplified values specific to this callsite. The
00108   // idea is to propagate any special information we have about arguments to
00109   // this call through the inlinable section of the function, and account for
00110   // likely simplifications post-inlining. The most important aspect we track
00111   // is CFG altering simplifications -- when we prove a basic block dead, that
00112   // can cause dramatic shifts in the cost of inlining a function.
00113   DenseMap<Value *, Constant *> SimplifiedValues;
00114 
00115   // Keep track of the values which map back (through function arguments) to
00116   // allocas on the caller stack which could be simplified through SROA.
00117   DenseMap<Value *, Value *> SROAArgValues;
00118 
00119   // The mapping of caller Alloca values to their accumulated cost savings. If
00120   // we have to disable SROA for one of the allocas, this tells us how much
00121   // cost must be added.
00122   DenseMap<Value *, int> SROAArgCosts;
00123 
00124   // Keep track of values which map to a pointer base and constant offset.
00125   DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
00126 
00127   // Custom simplification helper routines.
00128   bool isAllocaDerivedArg(Value *V);
00129   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
00130                             DenseMap<Value *, int>::iterator &CostIt);
00131   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
00132   void disableSROA(Value *V);
00133   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00134                           int InstructionCost);
00135   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
00136   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
00137   bool simplifyCallSite(Function *F, CallSite CS);
00138   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
00139 
00140   /// Return true if the given argument to the function being considered for
00141   /// inlining has the given attribute set either at the call site or the
00142   /// function declaration.  Primarily used to inspect call site specific
00143   /// attributes since these can be more precise than the ones on the callee
00144   /// itself.
00145   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
00146   
00147   /// Return true if the given value is known non null within the callee if
00148   /// inlined through this particular callsite.
00149   bool isKnownNonNullInCallee(Value *V);
00150 
00151   /// Update Threshold based on callsite properties such as callee
00152   /// attributes and callee hotness for PGO builds. The Callee is explicitly
00153   /// passed to support analyzing indirect calls whose target is inferred by
00154   /// analysis.
00155   void updateThreshold(CallSite CS, Function &Callee);
00156 
00157   // Custom analysis routines.
00158   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
00159 
00160   // Disable several entry points to the visitor so we don't accidentally use
00161   // them by declaring but not defining them here.
00162   void visit(Module *);     void visit(Module &);
00163   void visit(Function *);   void visit(Function &);
00164   void visit(BasicBlock *); void visit(BasicBlock &);
00165 
00166   // Provide base case for our instruction visit.
00167   bool visitInstruction(Instruction &I);
00168 
00169   // Our visit overrides.
00170   bool visitAlloca(AllocaInst &I);
00171   bool visitPHI(PHINode &I);
00172   bool visitGetElementPtr(GetElementPtrInst &I);
00173   bool visitBitCast(BitCastInst &I);
00174   bool visitPtrToInt(PtrToIntInst &I);
00175   bool visitIntToPtr(IntToPtrInst &I);
00176   bool visitCastInst(CastInst &I);
00177   bool visitUnaryInstruction(UnaryInstruction &I);
00178   bool visitCmpInst(CmpInst &I);
00179   bool visitSub(BinaryOperator &I);
00180   bool visitBinaryOperator(BinaryOperator &I);
00181   bool visitLoad(LoadInst &I);
00182   bool visitStore(StoreInst &I);
00183   bool visitExtractValue(ExtractValueInst &I);
00184   bool visitInsertValue(InsertValueInst &I);
00185   bool visitCallSite(CallSite CS);
00186   bool visitReturnInst(ReturnInst &RI);
00187   bool visitBranchInst(BranchInst &BI);
00188   bool visitSwitchInst(SwitchInst &SI);
00189   bool visitIndirectBrInst(IndirectBrInst &IBI);
00190   bool visitResumeInst(ResumeInst &RI);
00191   bool visitCleanupReturnInst(CleanupReturnInst &RI);
00192   bool visitCatchReturnInst(CatchReturnInst &RI);
00193   bool visitUnreachableInst(UnreachableInst &I);
00194 
00195 public:
00196   CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
00197                Function &Callee, int Threshold, CallSite CSArg)
00198     : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
00199         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
00200         ExposesReturnsTwice(false), HasDynamicAlloca(false),
00201         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
00202         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
00203         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
00204         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
00205         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
00206         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
00207         SROACostSavings(0), SROACostSavingsLost(0) {}
00208 
00209   bool analyzeCall(CallSite CS);
00210 
00211   int getThreshold() { return Threshold; }
00212   int getCost() { return Cost; }
00213 
00214   // Keep a bunch of stats about the cost savings found so we can print them
00215   // out when debugging.
00216   unsigned NumConstantArgs;
00217   unsigned NumConstantOffsetPtrArgs;
00218   unsigned NumAllocaArgs;
00219   unsigned NumConstantPtrCmps;
00220   unsigned NumConstantPtrDiffs;
00221   unsigned NumInstructionsSimplified;
00222   unsigned SROACostSavings;
00223   unsigned SROACostSavingsLost;
00224 
00225   void dump();
00226 };
00227 
00228 } // namespace
00229 
00230 /// \brief Test whether the given value is an Alloca-derived function argument.
00231 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
00232   return SROAArgValues.count(V);
00233 }
00234 
00235 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
00236 /// Returns false if V does not map to a SROA-candidate.
00237 bool CallAnalyzer::lookupSROAArgAndCost(
00238     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
00239   if (SROAArgValues.empty() || SROAArgCosts.empty())
00240     return false;
00241 
00242   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
00243   if (ArgIt == SROAArgValues.end())
00244     return false;
00245 
00246   Arg = ArgIt->second;
00247   CostIt = SROAArgCosts.find(Arg);
00248   return CostIt != SROAArgCosts.end();
00249 }
00250 
00251 /// \brief Disable SROA for the candidate marked by this cost iterator.
00252 ///
00253 /// This marks the candidate as no longer viable for SROA, and adds the cost
00254 /// savings associated with it back into the inline cost measurement.
00255 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
00256   // If we're no longer able to perform SROA we need to undo its cost savings
00257   // and prevent subsequent analysis.
00258   Cost += CostIt->second;
00259   SROACostSavings -= CostIt->second;
00260   SROACostSavingsLost += CostIt->second;
00261   SROAArgCosts.erase(CostIt);
00262 }
00263 
00264 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
00265 void CallAnalyzer::disableSROA(Value *V) {
00266   Value *SROAArg;
00267   DenseMap<Value *, int>::iterator CostIt;
00268   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
00269     disableSROA(CostIt);
00270 }
00271 
00272 /// \brief Accumulate the given cost for a particular SROA candidate.
00273 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00274                                       int InstructionCost) {
00275   CostIt->second += InstructionCost;
00276   SROACostSavings += InstructionCost;
00277 }
00278 
00279 /// \brief Check whether a GEP's indices are all constant.
00280 ///
00281 /// Respects any simplified values known during the analysis of this callsite.
00282 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
00283   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
00284     if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
00285       return false;
00286 
00287   return true;
00288 }
00289 
00290 /// \brief Accumulate a constant GEP offset into an APInt if possible.
00291 ///
00292 /// Returns false if unable to compute the offset for any reason. Respects any
00293 /// simplified values known during the analysis of this callsite.
00294 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
00295   const DataLayout &DL = F.getParent()->getDataLayout();
00296   unsigned IntPtrWidth = DL.getPointerSizeInBits();
00297   assert(IntPtrWidth == Offset.getBitWidth());
00298 
00299   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
00300        GTI != GTE; ++GTI) {
00301     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
00302     if (!OpC)
00303       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
00304         OpC = dyn_cast<ConstantInt>(SimpleOp);
00305     if (!OpC)
00306       return false;
00307     if (OpC->isZero()) continue;
00308 
00309     // Handle a struct index, which adds its field offset to the pointer.
00310     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00311       unsigned ElementIdx = OpC->getZExtValue();
00312       const StructLayout *SL = DL.getStructLayout(STy);
00313       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
00314       continue;
00315     }
00316 
00317     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
00318     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
00319   }
00320   return true;
00321 }
00322 
00323 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
00324   // Check whether inlining will turn a dynamic alloca into a static
00325   // alloca, and handle that case.
00326   if (I.isArrayAllocation()) {
00327     if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
00328       ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
00329       assert(AllocSize && "Allocation size not a constant int?");
00330       Type *Ty = I.getAllocatedType();
00331       AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
00332       return Base::visitAlloca(I);
00333     }
00334   }
00335 
00336   // Accumulate the allocated size.
00337   if (I.isStaticAlloca()) {
00338     const DataLayout &DL = F.getParent()->getDataLayout();
00339     Type *Ty = I.getAllocatedType();
00340     AllocatedSize += DL.getTypeAllocSize(Ty);
00341   }
00342 
00343   // We will happily inline static alloca instructions.
00344   if (I.isStaticAlloca())
00345     return Base::visitAlloca(I);
00346 
00347   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
00348   // a variety of reasons, and so we would like to not inline them into
00349   // functions which don't currently have a dynamic alloca. This simply
00350   // disables inlining altogether in the presence of a dynamic alloca.
00351   HasDynamicAlloca = true;
00352   return false;
00353 }
00354 
00355 bool CallAnalyzer::visitPHI(PHINode &I) {
00356   // FIXME: We should potentially be tracking values through phi nodes,
00357   // especially when they collapse to a single value due to deleted CFG edges
00358   // during inlining.
00359 
00360   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
00361   // though we don't want to propagate it's bonuses. The idea is to disable
00362   // SROA if it *might* be used in an inappropriate manner.
00363 
00364   // Phi nodes are always zero-cost.
00365   return true;
00366 }
00367 
00368 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
00369   Value *SROAArg;
00370   DenseMap<Value *, int>::iterator CostIt;
00371   bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
00372                                             SROAArg, CostIt);
00373 
00374   // Try to fold GEPs of constant-offset call site argument pointers. This
00375   // requires target data and inbounds GEPs.
00376   if (I.isInBounds()) {
00377     // Check if we have a base + offset for the pointer.
00378     Value *Ptr = I.getPointerOperand();
00379     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
00380     if (BaseAndOffset.first) {
00381       // Check if the offset of this GEP is constant, and if so accumulate it
00382       // into Offset.
00383       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
00384         // Non-constant GEPs aren't folded, and disable SROA.
00385         if (SROACandidate)
00386           disableSROA(CostIt);
00387         return false;
00388       }
00389 
00390       // Add the result as a new mapping to Base + Offset.
00391       ConstantOffsetPtrs[&I] = BaseAndOffset;
00392 
00393       // Also handle SROA candidates here, we already know that the GEP is
00394       // all-constant indexed.
00395       if (SROACandidate)
00396         SROAArgValues[&I] = SROAArg;
00397 
00398       return true;
00399     }
00400   }
00401 
00402   if (isGEPOffsetConstant(I)) {
00403     if (SROACandidate)
00404       SROAArgValues[&I] = SROAArg;
00405 
00406     // Constant GEPs are modeled as free.
00407     return true;
00408   }
00409 
00410   // Variable GEPs will require math and will disable SROA.
00411   if (SROACandidate)
00412     disableSROA(CostIt);
00413   return false;
00414 }
00415 
00416 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
00417   // Propagate constants through bitcasts.
00418   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00419   if (!COp)
00420     COp = SimplifiedValues.lookup(I.getOperand(0));
00421   if (COp)
00422     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
00423       SimplifiedValues[&I] = C;
00424       return true;
00425     }
00426 
00427   // Track base/offsets through casts
00428   std::pair<Value *, APInt> BaseAndOffset
00429     = ConstantOffsetPtrs.lookup(I.getOperand(0));
00430   // Casts don't change the offset, just wrap it up.
00431   if (BaseAndOffset.first)
00432     ConstantOffsetPtrs[&I] = BaseAndOffset;
00433 
00434   // Also look for SROA candidates here.
00435   Value *SROAArg;
00436   DenseMap<Value *, int>::iterator CostIt;
00437   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00438     SROAArgValues[&I] = SROAArg;
00439 
00440   // Bitcasts are always zero cost.
00441   return true;
00442 }
00443 
00444 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
00445   // Propagate constants through ptrtoint.
00446   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00447   if (!COp)
00448     COp = SimplifiedValues.lookup(I.getOperand(0));
00449   if (COp)
00450     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
00451       SimplifiedValues[&I] = C;
00452       return true;
00453     }
00454 
00455   // Track base/offset pairs when converted to a plain integer provided the
00456   // integer is large enough to represent the pointer.
00457   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
00458   const DataLayout &DL = F.getParent()->getDataLayout();
00459   if (IntegerSize >= DL.getPointerSizeInBits()) {
00460     std::pair<Value *, APInt> BaseAndOffset
00461       = ConstantOffsetPtrs.lookup(I.getOperand(0));
00462     if (BaseAndOffset.first)
00463       ConstantOffsetPtrs[&I] = BaseAndOffset;
00464   }
00465 
00466   // This is really weird. Technically, ptrtoint will disable SROA. However,
00467   // unless that ptrtoint is *used* somewhere in the live basic blocks after
00468   // inlining, it will be nuked, and SROA should proceed. All of the uses which
00469   // would block SROA would also block SROA if applied directly to a pointer,
00470   // and so we can just add the integer in here. The only places where SROA is
00471   // preserved either cannot fire on an integer, or won't in-and-of themselves
00472   // disable SROA (ext) w/o some later use that we would see and disable.
00473   Value *SROAArg;
00474   DenseMap<Value *, int>::iterator CostIt;
00475   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00476     SROAArgValues[&I] = SROAArg;
00477 
00478   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00479 }
00480 
00481 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
00482   // Propagate constants through ptrtoint.
00483   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00484   if (!COp)
00485     COp = SimplifiedValues.lookup(I.getOperand(0));
00486   if (COp)
00487     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
00488       SimplifiedValues[&I] = C;
00489       return true;
00490     }
00491 
00492   // Track base/offset pairs when round-tripped through a pointer without
00493   // modifications provided the integer is not too large.
00494   Value *Op = I.getOperand(0);
00495   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
00496   const DataLayout &DL = F.getParent()->getDataLayout();
00497   if (IntegerSize <= DL.getPointerSizeInBits()) {
00498     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
00499     if (BaseAndOffset.first)
00500       ConstantOffsetPtrs[&I] = BaseAndOffset;
00501   }
00502 
00503   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
00504   Value *SROAArg;
00505   DenseMap<Value *, int>::iterator CostIt;
00506   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
00507     SROAArgValues[&I] = SROAArg;
00508 
00509   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00510 }
00511 
00512 bool CallAnalyzer::visitCastInst(CastInst &I) {
00513   // Propagate constants through ptrtoint.
00514   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00515   if (!COp)
00516     COp = SimplifiedValues.lookup(I.getOperand(0));
00517   if (COp)
00518     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
00519       SimplifiedValues[&I] = C;
00520       return true;
00521     }
00522 
00523   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
00524   disableSROA(I.getOperand(0));
00525 
00526   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00527 }
00528 
00529 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
00530   Value *Operand = I.getOperand(0);
00531   Constant *COp = dyn_cast<Constant>(Operand);
00532   if (!COp)
00533     COp = SimplifiedValues.lookup(Operand);
00534   if (COp) {
00535     const DataLayout &DL = F.getParent()->getDataLayout();
00536     if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
00537       SimplifiedValues[&I] = C;
00538       return true;
00539     }
00540   }
00541 
00542   // Disable any SROA on the argument to arbitrary unary operators.
00543   disableSROA(Operand);
00544 
00545   return false;
00546 }
00547 
00548 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
00549   unsigned ArgNo = A->getArgNo();
00550   return CandidateCS.paramHasAttr(ArgNo+1, Attr);
00551 }
00552 
00553 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
00554   // Does the *call site* have the NonNull attribute set on an argument?  We
00555   // use the attribute on the call site to memoize any analysis done in the
00556   // caller. This will also trip if the callee function has a non-null
00557   // parameter attribute, but that's a less interesting case because hopefully
00558   // the callee would already have been simplified based on that.
00559   if (Argument *A = dyn_cast<Argument>(V))
00560     if (paramHasAttr(A, Attribute::NonNull))
00561       return true;
00562   
00563   // Is this an alloca in the caller?  This is distinct from the attribute case
00564   // above because attributes aren't updated within the inliner itself and we
00565   // always want to catch the alloca derived case.
00566   if (isAllocaDerivedArg(V))
00567     // We can actually predict the result of comparisons between an
00568     // alloca-derived value and null. Note that this fires regardless of
00569     // SROA firing.
00570     return true;
00571   
00572   return false;
00573 }
00574 
00575 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
00576   // If -inline-threshold is not given, listen to the optsize attribute when it
00577   // would decrease the threshold.
00578   Function *Caller = CS.getCaller();
00579 
00580   // FIXME: Use Function::optForSize()
00581   bool OptSize = Caller->hasFnAttribute(Attribute::OptimizeForSize);
00582 
00583   if (!(DefaultInlineThreshold.getNumOccurrences() > 0) && OptSize &&
00584       OptSizeThreshold < Threshold)
00585     Threshold = OptSizeThreshold;
00586 
00587   // If profile information is available, use that to adjust threshold of hot
00588   // and cold functions.
00589   // FIXME: The heuristic used below for determining hotness and coldness are
00590   // based on preliminary SPEC tuning and may not be optimal. Replace this with
00591   // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
00592   uint64_t FunctionCount = 0, MaxFunctionCount = 0;
00593   bool HasPGOCounts = false;
00594   if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
00595     HasPGOCounts = true;
00596     FunctionCount = Callee.getEntryCount().getValue();
00597     MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
00598   }
00599 
00600   // Listen to the inlinehint attribute or profile based hotness information
00601   // when it would increase the threshold and the caller does not need to
00602   // minimize its size.
00603   bool InlineHint =
00604       Callee.hasFnAttribute(Attribute::InlineHint) ||
00605       (HasPGOCounts &&
00606        FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
00607   if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
00608     Threshold = HintThreshold;
00609 
00610   // Listen to the cold attribute or profile based coldness information
00611   // when it would decrease the threshold.
00612   bool ColdCallee =
00613       Callee.hasFnAttribute(Attribute::Cold) ||
00614       (HasPGOCounts &&
00615        FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
00616   // Command line argument for DefaultInlineThreshold will override the default
00617   // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
00618   // do not use the default cold threshold even if it is smaller.
00619   if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
00620        ColdThreshold.getNumOccurrences() > 0) &&
00621       ColdCallee && ColdThreshold < Threshold)
00622     Threshold = ColdThreshold;
00623 }
00624 
00625 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
00626   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00627   // First try to handle simplified comparisons.
00628   if (!isa<Constant>(LHS))
00629     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00630       LHS = SimpleLHS;
00631   if (!isa<Constant>(RHS))
00632     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00633       RHS = SimpleRHS;
00634   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
00635     if (Constant *CRHS = dyn_cast<Constant>(RHS))
00636       if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
00637         SimplifiedValues[&I] = C;
00638         return true;
00639       }
00640   }
00641 
00642   if (I.getOpcode() == Instruction::FCmp)
00643     return false;
00644 
00645   // Otherwise look for a comparison between constant offset pointers with
00646   // a common base.
00647   Value *LHSBase, *RHSBase;
00648   APInt LHSOffset, RHSOffset;
00649   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00650   if (LHSBase) {
00651     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00652     if (RHSBase && LHSBase == RHSBase) {
00653       // We have common bases, fold the icmp to a constant based on the
00654       // offsets.
00655       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00656       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00657       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
00658         SimplifiedValues[&I] = C;
00659         ++NumConstantPtrCmps;
00660         return true;
00661       }
00662     }
00663   }
00664 
00665   // If the comparison is an equality comparison with null, we can simplify it
00666   // if we know the value (argument) can't be null
00667   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
00668       isKnownNonNullInCallee(I.getOperand(0))) {
00669     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
00670     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
00671                                       : ConstantInt::getFalse(I.getType());
00672     return true;
00673   }
00674   // Finally check for SROA candidates in comparisons.
00675   Value *SROAArg;
00676   DenseMap<Value *, int>::iterator CostIt;
00677   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00678     if (isa<ConstantPointerNull>(I.getOperand(1))) {
00679       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00680       return true;
00681     }
00682 
00683     disableSROA(CostIt);
00684   }
00685 
00686   return false;
00687 }
00688 
00689 bool CallAnalyzer::visitSub(BinaryOperator &I) {
00690   // Try to handle a special case: we can fold computing the difference of two
00691   // constant-related pointers.
00692   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00693   Value *LHSBase, *RHSBase;
00694   APInt LHSOffset, RHSOffset;
00695   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00696   if (LHSBase) {
00697     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00698     if (RHSBase && LHSBase == RHSBase) {
00699       // We have common bases, fold the subtract to a constant based on the
00700       // offsets.
00701       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00702       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00703       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
00704         SimplifiedValues[&I] = C;
00705         ++NumConstantPtrDiffs;
00706         return true;
00707       }
00708     }
00709   }
00710 
00711   // Otherwise, fall back to the generic logic for simplifying and handling
00712   // instructions.
00713   return Base::visitSub(I);
00714 }
00715 
00716 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
00717   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00718   const DataLayout &DL = F.getParent()->getDataLayout();
00719   if (!isa<Constant>(LHS))
00720     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00721       LHS = SimpleLHS;
00722   if (!isa<Constant>(RHS))
00723     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00724       RHS = SimpleRHS;
00725   Value *SimpleV = nullptr;
00726   if (auto FI = dyn_cast<FPMathOperator>(&I))
00727     SimpleV =
00728         SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
00729   else
00730     SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
00731 
00732   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
00733     SimplifiedValues[&I] = C;
00734     return true;
00735   }
00736 
00737   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
00738   disableSROA(LHS);
00739   disableSROA(RHS);
00740 
00741   return false;
00742 }
00743 
00744 bool CallAnalyzer::visitLoad(LoadInst &I) {
00745   Value *SROAArg;
00746   DenseMap<Value *, int>::iterator CostIt;
00747   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00748     if (I.isSimple()) {
00749       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00750       return true;
00751     }
00752 
00753     disableSROA(CostIt);
00754   }
00755 
00756   return false;
00757 }
00758 
00759 bool CallAnalyzer::visitStore(StoreInst &I) {
00760   Value *SROAArg;
00761   DenseMap<Value *, int>::iterator CostIt;
00762   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00763     if (I.isSimple()) {
00764       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00765       return true;
00766     }
00767 
00768     disableSROA(CostIt);
00769   }
00770 
00771   return false;
00772 }
00773 
00774 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
00775   // Constant folding for extract value is trivial.
00776   Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
00777   if (!C)
00778     C = SimplifiedValues.lookup(I.getAggregateOperand());
00779   if (C) {
00780     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
00781     return true;
00782   }
00783 
00784   // SROA can look through these but give them a cost.
00785   return false;
00786 }
00787 
00788 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
00789   // Constant folding for insert value is trivial.
00790   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
00791   if (!AggC)
00792     AggC = SimplifiedValues.lookup(I.getAggregateOperand());
00793   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
00794   if (!InsertedC)
00795     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
00796   if (AggC && InsertedC) {
00797     SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
00798                                                         I.getIndices());
00799     return true;
00800   }
00801 
00802   // SROA can look through these but give them a cost.
00803   return false;
00804 }
00805 
00806 /// \brief Try to simplify a call site.
00807 ///
00808 /// Takes a concrete function and callsite and tries to actually simplify it by
00809 /// analyzing the arguments and call itself with instsimplify. Returns true if
00810 /// it has simplified the callsite to some other entity (a constant), making it
00811 /// free.
00812 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
00813   // FIXME: Using the instsimplify logic directly for this is inefficient
00814   // because we have to continually rebuild the argument list even when no
00815   // simplifications can be performed. Until that is fixed with remapping
00816   // inside of instsimplify, directly constant fold calls here.
00817   if (!canConstantFoldCallTo(F))
00818     return false;
00819 
00820   // Try to re-map the arguments to constants.
00821   SmallVector<Constant *, 4> ConstantArgs;
00822   ConstantArgs.reserve(CS.arg_size());
00823   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
00824        I != E; ++I) {
00825     Constant *C = dyn_cast<Constant>(*I);
00826     if (!C)
00827       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
00828     if (!C)
00829       return false; // This argument doesn't map to a constant.
00830 
00831     ConstantArgs.push_back(C);
00832   }
00833   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
00834     SimplifiedValues[CS.getInstruction()] = C;
00835     return true;
00836   }
00837 
00838   return false;
00839 }
00840 
00841 bool CallAnalyzer::visitCallSite(CallSite CS) {
00842   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
00843       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
00844     // This aborts the entire analysis.
00845     ExposesReturnsTwice = true;
00846     return false;
00847   }
00848   if (CS.isCall() &&
00849       cast<CallInst>(CS.getInstruction())->cannotDuplicate())
00850     ContainsNoDuplicateCall = true;
00851 
00852   if (Function *F = CS.getCalledFunction()) {
00853     // When we have a concrete function, first try to simplify it directly.
00854     if (simplifyCallSite(F, CS))
00855       return true;
00856 
00857     // Next check if it is an intrinsic we know about.
00858     // FIXME: Lift this into part of the InstVisitor.
00859     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
00860       switch (II->getIntrinsicID()) {
00861       default:
00862         return Base::visitCallSite(CS);
00863 
00864       case Intrinsic::memset:
00865       case Intrinsic::memcpy:
00866       case Intrinsic::memmove:
00867         // SROA can usually chew through these intrinsics, but they aren't free.
00868         return false;
00869       case Intrinsic::localescape:
00870         HasFrameEscape = true;
00871         return false;
00872       }
00873     }
00874 
00875     if (F == CS.getInstruction()->getParent()->getParent()) {
00876       // This flag will fully abort the analysis, so don't bother with anything
00877       // else.
00878       IsRecursiveCall = true;
00879       return false;
00880     }
00881 
00882     if (TTI.isLoweredToCall(F)) {
00883       // We account for the average 1 instruction per call argument setup
00884       // here.
00885       Cost += CS.arg_size() * InlineConstants::InstrCost;
00886 
00887       // Everything other than inline ASM will also have a significant cost
00888       // merely from making the call.
00889       if (!isa<InlineAsm>(CS.getCalledValue()))
00890         Cost += InlineConstants::CallPenalty;
00891     }
00892 
00893     return Base::visitCallSite(CS);
00894   }
00895 
00896   // Otherwise we're in a very special case -- an indirect function call. See
00897   // if we can be particularly clever about this.
00898   Value *Callee = CS.getCalledValue();
00899 
00900   // First, pay the price of the argument setup. We account for the average
00901   // 1 instruction per call argument setup here.
00902   Cost += CS.arg_size() * InlineConstants::InstrCost;
00903 
00904   // Next, check if this happens to be an indirect function call to a known
00905   // function in this inline context. If not, we've done all we can.
00906   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
00907   if (!F)
00908     return Base::visitCallSite(CS);
00909 
00910   // If we have a constant that we are calling as a function, we can peer
00911   // through it and see the function target. This happens not infrequently
00912   // during devirtualization and so we want to give it a hefty bonus for
00913   // inlining, but cap that bonus in the event that inlining wouldn't pan
00914   // out. Pretend to inline the function, with a custom threshold.
00915   CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
00916   if (CA.analyzeCall(CS)) {
00917     // We were able to inline the indirect call! Subtract the cost from the
00918     // threshold to get the bonus we want to apply, but don't go below zero.
00919     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
00920   }
00921 
00922   return Base::visitCallSite(CS);
00923 }
00924 
00925 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
00926   // At least one return instruction will be free after inlining.
00927   bool Free = !HasReturn;
00928   HasReturn = true;
00929   return Free;
00930 }
00931 
00932 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
00933   // We model unconditional branches as essentially free -- they really
00934   // shouldn't exist at all, but handling them makes the behavior of the
00935   // inliner more regular and predictable. Interestingly, conditional branches
00936   // which will fold away are also free.
00937   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
00938          dyn_cast_or_null<ConstantInt>(
00939              SimplifiedValues.lookup(BI.getCondition()));
00940 }
00941 
00942 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
00943   // We model unconditional switches as free, see the comments on handling
00944   // branches.
00945   if (isa<ConstantInt>(SI.getCondition()))
00946     return true;
00947   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
00948     if (isa<ConstantInt>(V))
00949       return true;
00950 
00951   // Otherwise, we need to accumulate a cost proportional to the number of
00952   // distinct successor blocks. This fan-out in the CFG cannot be represented
00953   // for free even if we can represent the core switch as a jumptable that
00954   // takes a single instruction.
00955   //
00956   // NB: We convert large switches which are just used to initialize large phi
00957   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
00958   // inlining those. It will prevent inlining in cases where the optimization
00959   // does not (yet) fire.
00960   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
00961   SuccessorBlocks.insert(SI.getDefaultDest());
00962   for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
00963     SuccessorBlocks.insert(I.getCaseSuccessor());
00964   // Add cost corresponding to the number of distinct destinations. The first
00965   // we model as free because of fallthrough.
00966   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
00967   return false;
00968 }
00969 
00970 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
00971   // We never want to inline functions that contain an indirectbr.  This is
00972   // incorrect because all the blockaddress's (in static global initializers
00973   // for example) would be referring to the original function, and this
00974   // indirect jump would jump from the inlined copy of the function into the
00975   // original function which is extremely undefined behavior.
00976   // FIXME: This logic isn't really right; we can safely inline functions with
00977   // indirectbr's as long as no other function or global references the
00978   // blockaddress of a block within the current function.
00979   HasIndirectBr = true;
00980   return false;
00981 }
00982 
00983 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
00984   // FIXME: It's not clear that a single instruction is an accurate model for
00985   // the inline cost of a resume instruction.
00986   return false;
00987 }
00988 
00989 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
00990   // FIXME: It's not clear that a single instruction is an accurate model for
00991   // the inline cost of a cleanupret instruction.
00992   return false;
00993 }
00994 
00995 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
00996   // FIXME: It's not clear that a single instruction is an accurate model for
00997   // the inline cost of a catchret instruction.
00998   return false;
00999 }
01000 
01001 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
01002   // FIXME: It might be reasonably to discount the cost of instructions leading
01003   // to unreachable as they have the lowest possible impact on both runtime and
01004   // code size.
01005   return true; // No actual code is needed for unreachable.
01006 }
01007 
01008 bool CallAnalyzer::visitInstruction(Instruction &I) {
01009   // Some instructions are free. All of the free intrinsics can also be
01010   // handled by SROA, etc.
01011   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
01012     return true;
01013 
01014   // We found something we don't understand or can't handle. Mark any SROA-able
01015   // values in the operand list as no longer viable.
01016   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
01017     disableSROA(*OI);
01018 
01019   return false;
01020 }
01021 
01022 
01023 /// \brief Analyze a basic block for its contribution to the inline cost.
01024 ///
01025 /// This method walks the analyzer over every instruction in the given basic
01026 /// block and accounts for their cost during inlining at this callsite. It
01027 /// aborts early if the threshold has been exceeded or an impossible to inline
01028 /// construct has been detected. It returns false if inlining is no longer
01029 /// viable, and true if inlining remains viable.
01030 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
01031                                 SmallPtrSetImpl<const Value *> &EphValues) {
01032   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
01033     // FIXME: Currently, the number of instructions in a function regardless of
01034     // our ability to simplify them during inline to constants or dead code,
01035     // are actually used by the vector bonus heuristic. As long as that's true,
01036     // we have to special case debug intrinsics here to prevent differences in
01037     // inlining due to debug symbols. Eventually, the number of unsimplified
01038     // instructions shouldn't factor into the cost computation, but until then,
01039     // hack around it here.
01040     if (isa<DbgInfoIntrinsic>(I))
01041       continue;
01042 
01043     // Skip ephemeral values.
01044     if (EphValues.count(&*I))
01045       continue;
01046 
01047     ++NumInstructions;
01048     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
01049       ++NumVectorInstructions;
01050 
01051     // If the instruction is floating point, and the target says this operation
01052     // is expensive or the function has the "use-soft-float" attribute, this may
01053     // eventually become a library call. Treat the cost as such.
01054     if (I->getType()->isFloatingPointTy()) {
01055       bool hasSoftFloatAttr = false;
01056 
01057       // If the function has the "use-soft-float" attribute, mark it as
01058       // expensive.
01059       if (F.hasFnAttribute("use-soft-float")) {
01060         Attribute Attr = F.getFnAttribute("use-soft-float");
01061         StringRef Val = Attr.getValueAsString();
01062         if (Val == "true")
01063           hasSoftFloatAttr = true;
01064       }
01065 
01066       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
01067           hasSoftFloatAttr)
01068         Cost += InlineConstants::CallPenalty;
01069     }
01070 
01071     // If the instruction simplified to a constant, there is no cost to this
01072     // instruction. Visit the instructions using our InstVisitor to account for
01073     // all of the per-instruction logic. The visit tree returns true if we
01074     // consumed the instruction in any way, and false if the instruction's base
01075     // cost should count against inlining.
01076     if (Base::visit(&*I))
01077       ++NumInstructionsSimplified;
01078     else
01079       Cost += InlineConstants::InstrCost;
01080 
01081     // If the visit this instruction detected an uninlinable pattern, abort.
01082     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
01083         HasIndirectBr || HasFrameEscape)
01084       return false;
01085 
01086     // If the caller is a recursive function then we don't want to inline
01087     // functions which allocate a lot of stack space because it would increase
01088     // the caller stack usage dramatically.
01089     if (IsCallerRecursive &&
01090         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
01091       return false;
01092 
01093     // Check if we've past the maximum possible threshold so we don't spin in
01094     // huge basic blocks that will never inline.
01095     if (Cost > Threshold)
01096       return false;
01097   }
01098 
01099   return true;
01100 }
01101 
01102 /// \brief Compute the base pointer and cumulative constant offsets for V.
01103 ///
01104 /// This strips all constant offsets off of V, leaving it the base pointer, and
01105 /// accumulates the total constant offset applied in the returned constant. It
01106 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
01107 /// no constant offsets applied.
01108 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
01109   if (!V->getType()->isPointerTy())
01110     return nullptr;
01111 
01112   const DataLayout &DL = F.getParent()->getDataLayout();
01113   unsigned IntPtrWidth = DL.getPointerSizeInBits();
01114   APInt Offset = APInt::getNullValue(IntPtrWidth);
01115 
01116   // Even though we don't look through PHI nodes, we could be called on an
01117   // instruction in an unreachable block, which may be on a cycle.
01118   SmallPtrSet<Value *, 4> Visited;
01119   Visited.insert(V);
01120   do {
01121     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
01122       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
01123         return nullptr;
01124       V = GEP->getPointerOperand();
01125     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
01126       V = cast<Operator>(V)->getOperand(0);
01127     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
01128       if (GA->mayBeOverridden())
01129         break;
01130       V = GA->getAliasee();
01131     } else {
01132       break;
01133     }
01134     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
01135   } while (Visited.insert(V).second);
01136 
01137   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
01138   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
01139 }
01140 
01141 /// \brief Analyze a call site for potential inlining.
01142 ///
01143 /// Returns true if inlining this call is viable, and false if it is not
01144 /// viable. It computes the cost and adjusts the threshold based on numerous
01145 /// factors and heuristics. If this method returns false but the computed cost
01146 /// is below the computed threshold, then inlining was forcibly disabled by
01147 /// some artifact of the routine.
01148 bool CallAnalyzer::analyzeCall(CallSite CS) {
01149   ++NumCallsAnalyzed;
01150 
01151   // Perform some tweaks to the cost and threshold based on the direct
01152   // callsite information.
01153 
01154   // We want to more aggressively inline vector-dense kernels, so up the
01155   // threshold, and we'll lower it if the % of vector instructions gets too
01156   // low. Note that these bonuses are some what arbitrary and evolved over time
01157   // by accident as much as because they are principled bonuses.
01158   //
01159   // FIXME: It would be nice to remove all such bonuses. At least it would be
01160   // nice to base the bonus values on something more scientific.
01161   assert(NumInstructions == 0);
01162   assert(NumVectorInstructions == 0);
01163 
01164   // Update the threshold based on callsite properties
01165   updateThreshold(CS, F);
01166 
01167   FiftyPercentVectorBonus = 3 * Threshold / 2;
01168   TenPercentVectorBonus = 3 * Threshold / 4;
01169   const DataLayout &DL = F.getParent()->getDataLayout();
01170 
01171   // Track whether the post-inlining function would have more than one basic
01172   // block. A single basic block is often intended for inlining. Balloon the
01173   // threshold by 50% until we pass the single-BB phase.
01174   bool SingleBB = true;
01175   int SingleBBBonus = Threshold / 2;
01176 
01177   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
01178   // this Threshold any time, and cost cannot decrease, we can stop processing
01179   // the rest of the function body.
01180   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
01181 
01182   // Give out bonuses per argument, as the instructions setting them up will
01183   // be gone after inlining.
01184   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
01185     if (CS.isByValArgument(I)) {
01186       // We approximate the number of loads and stores needed by dividing the
01187       // size of the byval type by the target's pointer size.
01188       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
01189       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
01190       unsigned PointerSize = DL.getPointerSizeInBits();
01191       // Ceiling division.
01192       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
01193 
01194       // If it generates more than 8 stores it is likely to be expanded as an
01195       // inline memcpy so we take that as an upper bound. Otherwise we assume
01196       // one load and one store per word copied.
01197       // FIXME: The maxStoresPerMemcpy setting from the target should be used
01198       // here instead of a magic number of 8, but it's not available via
01199       // DataLayout.
01200       NumStores = std::min(NumStores, 8U);
01201 
01202       Cost -= 2 * NumStores * InlineConstants::InstrCost;
01203     } else {
01204       // For non-byval arguments subtract off one instruction per call
01205       // argument.
01206       Cost -= InlineConstants::InstrCost;
01207     }
01208   }
01209 
01210   // If there is only one call of the function, and it has internal linkage,
01211   // the cost of inlining it drops dramatically.
01212   bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
01213     &F == CS.getCalledFunction();
01214   if (OnlyOneCallAndLocalLinkage)
01215     Cost += InlineConstants::LastCallToStaticBonus;
01216 
01217   // If the instruction after the call, or if the normal destination of the
01218   // invoke is an unreachable instruction, the function is noreturn. As such,
01219   // there is little point in inlining this unless there is literally zero
01220   // cost.
01221   Instruction *Instr = CS.getInstruction();
01222   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
01223     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
01224       Threshold = 0;
01225   } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
01226     Threshold = 0;
01227 
01228   // If this function uses the coldcc calling convention, prefer not to inline
01229   // it.
01230   if (F.getCallingConv() == CallingConv::Cold)
01231     Cost += InlineConstants::ColdccPenalty;
01232 
01233   // Check if we're done. This can happen due to bonuses and penalties.
01234   if (Cost > Threshold)
01235     return false;
01236 
01237   if (F.empty())
01238     return true;
01239 
01240   Function *Caller = CS.getInstruction()->getParent()->getParent();
01241   // Check if the caller function is recursive itself.
01242   for (User *U : Caller->users()) {
01243     CallSite Site(U);
01244     if (!Site)
01245       continue;
01246     Instruction *I = Site.getInstruction();
01247     if (I->getParent()->getParent() == Caller) {
01248       IsCallerRecursive = true;
01249       break;
01250     }
01251   }
01252 
01253   // Populate our simplified values by mapping from function arguments to call
01254   // arguments with known important simplifications.
01255   CallSite::arg_iterator CAI = CS.arg_begin();
01256   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
01257        FAI != FAE; ++FAI, ++CAI) {
01258     assert(CAI != CS.arg_end());
01259     if (Constant *C = dyn_cast<Constant>(CAI))
01260       SimplifiedValues[&*FAI] = C;
01261 
01262     Value *PtrArg = *CAI;
01263     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
01264       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
01265 
01266       // We can SROA any pointer arguments derived from alloca instructions.
01267       if (isa<AllocaInst>(PtrArg)) {
01268         SROAArgValues[&*FAI] = PtrArg;
01269         SROAArgCosts[PtrArg] = 0;
01270       }
01271     }
01272   }
01273   NumConstantArgs = SimplifiedValues.size();
01274   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
01275   NumAllocaArgs = SROAArgValues.size();
01276 
01277   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
01278   // the ephemeral values multiple times (and they're completely determined by
01279   // the callee, so this is purely duplicate work).
01280   SmallPtrSet<const Value *, 32> EphValues;
01281   CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
01282 
01283   // The worklist of live basic blocks in the callee *after* inlining. We avoid
01284   // adding basic blocks of the callee which can be proven to be dead for this
01285   // particular call site in order to get more accurate cost estimates. This
01286   // requires a somewhat heavyweight iteration pattern: we need to walk the
01287   // basic blocks in a breadth-first order as we insert live successors. To
01288   // accomplish this, prioritizing for small iterations because we exit after
01289   // crossing our threshold, we use a small-size optimized SetVector.
01290   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
01291                                   SmallPtrSet<BasicBlock *, 16> > BBSetVector;
01292   BBSetVector BBWorklist;
01293   BBWorklist.insert(&F.getEntryBlock());
01294   // Note that we *must not* cache the size, this loop grows the worklist.
01295   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
01296     // Bail out the moment we cross the threshold. This means we'll under-count
01297     // the cost, but only when undercounting doesn't matter.
01298     if (Cost > Threshold)
01299       break;
01300 
01301     BasicBlock *BB = BBWorklist[Idx];
01302     if (BB->empty())
01303       continue;
01304 
01305     // Disallow inlining a blockaddress. A blockaddress only has defined
01306     // behavior for an indirect branch in the same function, and we do not
01307     // currently support inlining indirect branches. But, the inliner may not
01308     // see an indirect branch that ends up being dead code at a particular call
01309     // site. If the blockaddress escapes the function, e.g., via a global
01310     // variable, inlining may lead to an invalid cross-function reference.
01311     if (BB->hasAddressTaken())
01312       return false;
01313 
01314     // Analyze the cost of this block. If we blow through the threshold, this
01315     // returns false, and we can bail on out.
01316     if (!analyzeBlock(BB, EphValues)) {
01317       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
01318           HasIndirectBr || HasFrameEscape)
01319         return false;
01320 
01321       // If the caller is a recursive function then we don't want to inline
01322       // functions which allocate a lot of stack space because it would increase
01323       // the caller stack usage dramatically.
01324       if (IsCallerRecursive &&
01325           AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
01326         return false;
01327 
01328       break;
01329     }
01330 
01331     TerminatorInst *TI = BB->getTerminator();
01332 
01333     // Add in the live successors by first checking whether we have terminator
01334     // that may be simplified based on the values simplified by this call.
01335     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
01336       if (BI->isConditional()) {
01337         Value *Cond = BI->getCondition();
01338         if (ConstantInt *SimpleCond
01339               = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01340           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
01341           continue;
01342         }
01343       }
01344     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
01345       Value *Cond = SI->getCondition();
01346       if (ConstantInt *SimpleCond
01347             = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01348         BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
01349         continue;
01350       }
01351     }
01352 
01353     // If we're unable to select a particular successor, just count all of
01354     // them.
01355     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
01356          ++TIdx)
01357       BBWorklist.insert(TI->getSuccessor(TIdx));
01358 
01359     // If we had any successors at this point, than post-inlining is likely to
01360     // have them as well. Note that we assume any basic blocks which existed
01361     // due to branches or switches which folded above will also fold after
01362     // inlining.
01363     if (SingleBB && TI->getNumSuccessors() > 1) {
01364       // Take off the bonus we applied to the threshold.
01365       Threshold -= SingleBBBonus;
01366       SingleBB = false;
01367     }
01368   }
01369 
01370   // If this is a noduplicate call, we can still inline as long as
01371   // inlining this would cause the removal of the caller (so the instruction
01372   // is not actually duplicated, just moved).
01373   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
01374     return false;
01375 
01376   // We applied the maximum possible vector bonus at the beginning. Now,
01377   // subtract the excess bonus, if any, from the Threshold before
01378   // comparing against Cost.
01379   if (NumVectorInstructions <= NumInstructions / 10)
01380     Threshold -= FiftyPercentVectorBonus;
01381   else if (NumVectorInstructions <= NumInstructions / 2)
01382     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
01383 
01384   return Cost <= std::max(0, Threshold);
01385 }
01386 
01387 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01388 /// \brief Dump stats about this call's analysis.
01389 void CallAnalyzer::dump() {
01390 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
01391   DEBUG_PRINT_STAT(NumConstantArgs);
01392   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
01393   DEBUG_PRINT_STAT(NumAllocaArgs);
01394   DEBUG_PRINT_STAT(NumConstantPtrCmps);
01395   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
01396   DEBUG_PRINT_STAT(NumInstructionsSimplified);
01397   DEBUG_PRINT_STAT(NumInstructions);
01398   DEBUG_PRINT_STAT(SROACostSavings);
01399   DEBUG_PRINT_STAT(SROACostSavingsLost);
01400   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
01401   DEBUG_PRINT_STAT(Cost);
01402   DEBUG_PRINT_STAT(Threshold);
01403 #undef DEBUG_PRINT_STAT
01404 }
01405 #endif
01406 
01407 /// \brief Test that two functions either have or have not the given attribute
01408 ///        at the same time.
01409 template<typename AttrKind>
01410 static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
01411   return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
01412 }
01413 
01414 /// \brief Test that there are no attribute conflicts between Caller and Callee
01415 ///        that prevent inlining.
01416 static bool functionsHaveCompatibleAttributes(Function *Caller,
01417                                               Function *Callee,
01418                                               TargetTransformInfo &TTI) {
01419   return TTI.areInlineCompatible(Caller, Callee) &&
01420          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
01421 }
01422 
01423 InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
01424                                TargetTransformInfo &CalleeTTI,
01425                                AssumptionCacheTracker *ACT) {
01426   return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
01427                        ACT);
01428 }
01429 
01430 int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
01431                                         unsigned SizeOptLevel) {
01432   if (OptLevel > 2)
01433     return OptAggressiveThreshold;
01434   if (SizeOptLevel == 1) // -Os
01435     return OptSizeThreshold;
01436   if (SizeOptLevel == 2) // -Oz
01437     return OptMinSizeThreshold;
01438   return DefaultInlineThreshold;
01439 }
01440 
01441 int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
01442 
01443 InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
01444                                int DefaultThreshold,
01445                                TargetTransformInfo &CalleeTTI,
01446                                AssumptionCacheTracker *ACT) {
01447 
01448   // Cannot inline indirect calls.
01449   if (!Callee)
01450     return llvm::InlineCost::getNever();
01451 
01452   // Calls to functions with always-inline attributes should be inlined
01453   // whenever possible.
01454   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
01455     if (isInlineViable(*Callee))
01456       return llvm::InlineCost::getAlways();
01457     return llvm::InlineCost::getNever();
01458   }
01459 
01460   // Never inline functions with conflicting attributes (unless callee has
01461   // always-inline attribute).
01462   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
01463     return llvm::InlineCost::getNever();
01464 
01465   // Don't inline this call if the caller has the optnone attribute.
01466   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
01467     return llvm::InlineCost::getNever();
01468 
01469   // Don't inline functions which can be redefined at link-time to mean
01470   // something else.  Don't inline functions marked noinline or call sites
01471   // marked noinline.
01472   if (Callee->mayBeOverridden() ||
01473       Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
01474     return llvm::InlineCost::getNever();
01475 
01476   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
01477         << "...\n");
01478 
01479   CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
01480   bool ShouldInline = CA.analyzeCall(CS);
01481 
01482   DEBUG(CA.dump());
01483 
01484   // Check if there was a reason to force inlining or no inlining.
01485   if (!ShouldInline && CA.getCost() < CA.getThreshold())
01486     return InlineCost::getNever();
01487   if (ShouldInline && CA.getCost() >= CA.getThreshold())
01488     return InlineCost::getAlways();
01489 
01490   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
01491 }
01492 
01493 bool llvm::isInlineViable(Function &F) {
01494   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
01495   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
01496     // Disallow inlining of functions which contain indirect branches or
01497     // blockaddresses.
01498     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
01499       return false;
01500 
01501     for (auto &II : *BI) {
01502       CallSite CS(&II);
01503       if (!CS)
01504         continue;
01505 
01506       // Disallow recursive calls.
01507       if (&F == CS.getCalledFunction())
01508         return false;
01509 
01510       // Disallow calls which expose returns-twice to a function not previously
01511       // attributed as such.
01512       if (!ReturnsTwice && CS.isCall() &&
01513           cast<CallInst>(CS.getInstruction())->canReturnTwice())
01514         return false;
01515 
01516       // Disallow inlining functions that call @llvm.localescape. Doing this
01517       // correctly would require major changes to the inliner.
01518       if (CS.getCalledFunction() &&
01519           CS.getCalledFunction()->getIntrinsicID() ==
01520               llvm::Intrinsic::localescape)
01521         return false;
01522     }
01523   }
01524 
01525   return true;
01526 }