LLVM  mainline
InlineCost.cpp
Go to the documentation of this file.
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements inline cost analysis.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/Analysis/InlineCost.h"
00015 #include "llvm/ADT/STLExtras.h"
00016 #include "llvm/ADT/SetVector.h"
00017 #include "llvm/ADT/SmallPtrSet.h"
00018 #include "llvm/ADT/SmallVector.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/AssumptionCache.h"
00021 #include "llvm/Analysis/CodeMetrics.h"
00022 #include "llvm/Analysis/ConstantFolding.h"
00023 #include "llvm/Analysis/InstructionSimplify.h"
00024 #include "llvm/Analysis/TargetTransformInfo.h"
00025 #include "llvm/IR/CallSite.h"
00026 #include "llvm/IR/CallingConv.h"
00027 #include "llvm/IR/DataLayout.h"
00028 #include "llvm/IR/GetElementPtrTypeIterator.h"
00029 #include "llvm/IR/GlobalAlias.h"
00030 #include "llvm/IR/InstVisitor.h"
00031 #include "llvm/IR/IntrinsicInst.h"
00032 #include "llvm/IR/Operator.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Support/raw_ostream.h"
00035 
00036 using namespace llvm;
00037 
00038 #define DEBUG_TYPE "inline-cost"
00039 
00040 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
00041 
00042 namespace {
00043 
00044 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
00045   typedef InstVisitor<CallAnalyzer, bool> Base;
00046   friend class InstVisitor<CallAnalyzer, bool>;
00047 
00048   /// The TargetTransformInfo available for this compilation.
00049   const TargetTransformInfo &TTI;
00050 
00051   /// The cache of @llvm.assume intrinsics.
00052   AssumptionCacheTracker *ACT;
00053 
00054   // The called function.
00055   Function &F;
00056 
00057   int Threshold;
00058   int Cost;
00059 
00060   bool IsCallerRecursive;
00061   bool IsRecursiveCall;
00062   bool ExposesReturnsTwice;
00063   bool HasDynamicAlloca;
00064   bool ContainsNoDuplicateCall;
00065   bool HasReturn;
00066   bool HasIndirectBr;
00067   bool HasFrameEscape;
00068 
00069   /// Number of bytes allocated statically by the callee.
00070   uint64_t AllocatedSize;
00071   unsigned NumInstructions, NumVectorInstructions;
00072   int FiftyPercentVectorBonus, TenPercentVectorBonus;
00073   int VectorBonus;
00074 
00075   // While we walk the potentially-inlined instructions, we build up and
00076   // maintain a mapping of simplified values specific to this callsite. The
00077   // idea is to propagate any special information we have about arguments to
00078   // this call through the inlinable section of the function, and account for
00079   // likely simplifications post-inlining. The most important aspect we track
00080   // is CFG altering simplifications -- when we prove a basic block dead, that
00081   // can cause dramatic shifts in the cost of inlining a function.
00082   DenseMap<Value *, Constant *> SimplifiedValues;
00083 
00084   // Keep track of the values which map back (through function arguments) to
00085   // allocas on the caller stack which could be simplified through SROA.
00086   DenseMap<Value *, Value *> SROAArgValues;
00087 
00088   // The mapping of caller Alloca values to their accumulated cost savings. If
00089   // we have to disable SROA for one of the allocas, this tells us how much
00090   // cost must be added.
00091   DenseMap<Value *, int> SROAArgCosts;
00092 
00093   // Keep track of values which map to a pointer base and constant offset.
00094   DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
00095 
00096   // Custom simplification helper routines.
00097   bool isAllocaDerivedArg(Value *V);
00098   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
00099                             DenseMap<Value *, int>::iterator &CostIt);
00100   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
00101   void disableSROA(Value *V);
00102   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00103                           int InstructionCost);
00104   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
00105   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
00106   bool simplifyCallSite(Function *F, CallSite CS);
00107   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
00108 
00109   // Custom analysis routines.
00110   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
00111 
00112   // Disable several entry points to the visitor so we don't accidentally use
00113   // them by declaring but not defining them here.
00114   void visit(Module *);     void visit(Module &);
00115   void visit(Function *);   void visit(Function &);
00116   void visit(BasicBlock *); void visit(BasicBlock &);
00117 
00118   // Provide base case for our instruction visit.
00119   bool visitInstruction(Instruction &I);
00120 
00121   // Our visit overrides.
00122   bool visitAlloca(AllocaInst &I);
00123   bool visitPHI(PHINode &I);
00124   bool visitGetElementPtr(GetElementPtrInst &I);
00125   bool visitBitCast(BitCastInst &I);
00126   bool visitPtrToInt(PtrToIntInst &I);
00127   bool visitIntToPtr(IntToPtrInst &I);
00128   bool visitCastInst(CastInst &I);
00129   bool visitUnaryInstruction(UnaryInstruction &I);
00130   bool visitCmpInst(CmpInst &I);
00131   bool visitSub(BinaryOperator &I);
00132   bool visitBinaryOperator(BinaryOperator &I);
00133   bool visitLoad(LoadInst &I);
00134   bool visitStore(StoreInst &I);
00135   bool visitExtractValue(ExtractValueInst &I);
00136   bool visitInsertValue(InsertValueInst &I);
00137   bool visitCallSite(CallSite CS);
00138   bool visitReturnInst(ReturnInst &RI);
00139   bool visitBranchInst(BranchInst &BI);
00140   bool visitSwitchInst(SwitchInst &SI);
00141   bool visitIndirectBrInst(IndirectBrInst &IBI);
00142   bool visitResumeInst(ResumeInst &RI);
00143   bool visitUnreachableInst(UnreachableInst &I);
00144 
00145 public:
00146   CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
00147                Function &Callee, int Threshold)
00148       : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
00149         IsCallerRecursive(false), IsRecursiveCall(false),
00150         ExposesReturnsTwice(false), HasDynamicAlloca(false),
00151         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
00152         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
00153         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
00154         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
00155         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
00156         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
00157         SROACostSavings(0), SROACostSavingsLost(0) {}
00158 
00159   bool analyzeCall(CallSite CS);
00160 
00161   int getThreshold() { return Threshold; }
00162   int getCost() { return Cost; }
00163 
00164   // Keep a bunch of stats about the cost savings found so we can print them
00165   // out when debugging.
00166   unsigned NumConstantArgs;
00167   unsigned NumConstantOffsetPtrArgs;
00168   unsigned NumAllocaArgs;
00169   unsigned NumConstantPtrCmps;
00170   unsigned NumConstantPtrDiffs;
00171   unsigned NumInstructionsSimplified;
00172   unsigned SROACostSavings;
00173   unsigned SROACostSavingsLost;
00174 
00175   void dump();
00176 };
00177 
00178 } // namespace
00179 
00180 /// \brief Test whether the given value is an Alloca-derived function argument.
00181 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
00182   return SROAArgValues.count(V);
00183 }
00184 
00185 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
00186 /// Returns false if V does not map to a SROA-candidate.
00187 bool CallAnalyzer::lookupSROAArgAndCost(
00188     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
00189   if (SROAArgValues.empty() || SROAArgCosts.empty())
00190     return false;
00191 
00192   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
00193   if (ArgIt == SROAArgValues.end())
00194     return false;
00195 
00196   Arg = ArgIt->second;
00197   CostIt = SROAArgCosts.find(Arg);
00198   return CostIt != SROAArgCosts.end();
00199 }
00200 
00201 /// \brief Disable SROA for the candidate marked by this cost iterator.
00202 ///
00203 /// This marks the candidate as no longer viable for SROA, and adds the cost
00204 /// savings associated with it back into the inline cost measurement.
00205 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
00206   // If we're no longer able to perform SROA we need to undo its cost savings
00207   // and prevent subsequent analysis.
00208   Cost += CostIt->second;
00209   SROACostSavings -= CostIt->second;
00210   SROACostSavingsLost += CostIt->second;
00211   SROAArgCosts.erase(CostIt);
00212 }
00213 
00214 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
00215 void CallAnalyzer::disableSROA(Value *V) {
00216   Value *SROAArg;
00217   DenseMap<Value *, int>::iterator CostIt;
00218   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
00219     disableSROA(CostIt);
00220 }
00221 
00222 /// \brief Accumulate the given cost for a particular SROA candidate.
00223 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00224                                       int InstructionCost) {
00225   CostIt->second += InstructionCost;
00226   SROACostSavings += InstructionCost;
00227 }
00228 
00229 /// \brief Check whether a GEP's indices are all constant.
00230 ///
00231 /// Respects any simplified values known during the analysis of this callsite.
00232 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
00233   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
00234     if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
00235       return false;
00236 
00237   return true;
00238 }
00239 
00240 /// \brief Accumulate a constant GEP offset into an APInt if possible.
00241 ///
00242 /// Returns false if unable to compute the offset for any reason. Respects any
00243 /// simplified values known during the analysis of this callsite.
00244 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
00245   const DataLayout &DL = F.getParent()->getDataLayout();
00246   unsigned IntPtrWidth = DL.getPointerSizeInBits();
00247   assert(IntPtrWidth == Offset.getBitWidth());
00248 
00249   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
00250        GTI != GTE; ++GTI) {
00251     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
00252     if (!OpC)
00253       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
00254         OpC = dyn_cast<ConstantInt>(SimpleOp);
00255     if (!OpC)
00256       return false;
00257     if (OpC->isZero()) continue;
00258 
00259     // Handle a struct index, which adds its field offset to the pointer.
00260     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00261       unsigned ElementIdx = OpC->getZExtValue();
00262       const StructLayout *SL = DL.getStructLayout(STy);
00263       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
00264       continue;
00265     }
00266 
00267     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
00268     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
00269   }
00270   return true;
00271 }
00272 
00273 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
00274   // Check whether inlining will turn a dynamic alloca into a static
00275   // alloca, and handle that case.
00276   if (I.isArrayAllocation()) {
00277     if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
00278       ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
00279       assert(AllocSize && "Allocation size not a constant int?");
00280       Type *Ty = I.getAllocatedType();
00281       AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
00282       return Base::visitAlloca(I);
00283     }
00284   }
00285 
00286   // Accumulate the allocated size.
00287   if (I.isStaticAlloca()) {
00288     const DataLayout &DL = F.getParent()->getDataLayout();
00289     Type *Ty = I.getAllocatedType();
00290     AllocatedSize += DL.getTypeAllocSize(Ty);
00291   }
00292 
00293   // We will happily inline static alloca instructions.
00294   if (I.isStaticAlloca())
00295     return Base::visitAlloca(I);
00296 
00297   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
00298   // a variety of reasons, and so we would like to not inline them into
00299   // functions which don't currently have a dynamic alloca. This simply
00300   // disables inlining altogether in the presence of a dynamic alloca.
00301   HasDynamicAlloca = true;
00302   return false;
00303 }
00304 
00305 bool CallAnalyzer::visitPHI(PHINode &I) {
00306   // FIXME: We should potentially be tracking values through phi nodes,
00307   // especially when they collapse to a single value due to deleted CFG edges
00308   // during inlining.
00309 
00310   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
00311   // though we don't want to propagate it's bonuses. The idea is to disable
00312   // SROA if it *might* be used in an inappropriate manner.
00313 
00314   // Phi nodes are always zero-cost.
00315   return true;
00316 }
00317 
00318 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
00319   Value *SROAArg;
00320   DenseMap<Value *, int>::iterator CostIt;
00321   bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
00322                                             SROAArg, CostIt);
00323 
00324   // Try to fold GEPs of constant-offset call site argument pointers. This
00325   // requires target data and inbounds GEPs.
00326   if (I.isInBounds()) {
00327     // Check if we have a base + offset for the pointer.
00328     Value *Ptr = I.getPointerOperand();
00329     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
00330     if (BaseAndOffset.first) {
00331       // Check if the offset of this GEP is constant, and if so accumulate it
00332       // into Offset.
00333       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
00334         // Non-constant GEPs aren't folded, and disable SROA.
00335         if (SROACandidate)
00336           disableSROA(CostIt);
00337         return false;
00338       }
00339 
00340       // Add the result as a new mapping to Base + Offset.
00341       ConstantOffsetPtrs[&I] = BaseAndOffset;
00342 
00343       // Also handle SROA candidates here, we already know that the GEP is
00344       // all-constant indexed.
00345       if (SROACandidate)
00346         SROAArgValues[&I] = SROAArg;
00347 
00348       return true;
00349     }
00350   }
00351 
00352   if (isGEPOffsetConstant(I)) {
00353     if (SROACandidate)
00354       SROAArgValues[&I] = SROAArg;
00355 
00356     // Constant GEPs are modeled as free.
00357     return true;
00358   }
00359 
00360   // Variable GEPs will require math and will disable SROA.
00361   if (SROACandidate)
00362     disableSROA(CostIt);
00363   return false;
00364 }
00365 
00366 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
00367   // Propagate constants through bitcasts.
00368   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00369   if (!COp)
00370     COp = SimplifiedValues.lookup(I.getOperand(0));
00371   if (COp)
00372     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
00373       SimplifiedValues[&I] = C;
00374       return true;
00375     }
00376 
00377   // Track base/offsets through casts
00378   std::pair<Value *, APInt> BaseAndOffset
00379     = ConstantOffsetPtrs.lookup(I.getOperand(0));
00380   // Casts don't change the offset, just wrap it up.
00381   if (BaseAndOffset.first)
00382     ConstantOffsetPtrs[&I] = BaseAndOffset;
00383 
00384   // Also look for SROA candidates here.
00385   Value *SROAArg;
00386   DenseMap<Value *, int>::iterator CostIt;
00387   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00388     SROAArgValues[&I] = SROAArg;
00389 
00390   // Bitcasts are always zero cost.
00391   return true;
00392 }
00393 
00394 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
00395   // Propagate constants through ptrtoint.
00396   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00397   if (!COp)
00398     COp = SimplifiedValues.lookup(I.getOperand(0));
00399   if (COp)
00400     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
00401       SimplifiedValues[&I] = C;
00402       return true;
00403     }
00404 
00405   // Track base/offset pairs when converted to a plain integer provided the
00406   // integer is large enough to represent the pointer.
00407   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
00408   const DataLayout &DL = F.getParent()->getDataLayout();
00409   if (IntegerSize >= DL.getPointerSizeInBits()) {
00410     std::pair<Value *, APInt> BaseAndOffset
00411       = ConstantOffsetPtrs.lookup(I.getOperand(0));
00412     if (BaseAndOffset.first)
00413       ConstantOffsetPtrs[&I] = BaseAndOffset;
00414   }
00415 
00416   // This is really weird. Technically, ptrtoint will disable SROA. However,
00417   // unless that ptrtoint is *used* somewhere in the live basic blocks after
00418   // inlining, it will be nuked, and SROA should proceed. All of the uses which
00419   // would block SROA would also block SROA if applied directly to a pointer,
00420   // and so we can just add the integer in here. The only places where SROA is
00421   // preserved either cannot fire on an integer, or won't in-and-of themselves
00422   // disable SROA (ext) w/o some later use that we would see and disable.
00423   Value *SROAArg;
00424   DenseMap<Value *, int>::iterator CostIt;
00425   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00426     SROAArgValues[&I] = SROAArg;
00427 
00428   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00429 }
00430 
00431 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
00432   // Propagate constants through ptrtoint.
00433   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00434   if (!COp)
00435     COp = SimplifiedValues.lookup(I.getOperand(0));
00436   if (COp)
00437     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
00438       SimplifiedValues[&I] = C;
00439       return true;
00440     }
00441 
00442   // Track base/offset pairs when round-tripped through a pointer without
00443   // modifications provided the integer is not too large.
00444   Value *Op = I.getOperand(0);
00445   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
00446   const DataLayout &DL = F.getParent()->getDataLayout();
00447   if (IntegerSize <= DL.getPointerSizeInBits()) {
00448     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
00449     if (BaseAndOffset.first)
00450       ConstantOffsetPtrs[&I] = BaseAndOffset;
00451   }
00452 
00453   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
00454   Value *SROAArg;
00455   DenseMap<Value *, int>::iterator CostIt;
00456   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
00457     SROAArgValues[&I] = SROAArg;
00458 
00459   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00460 }
00461 
00462 bool CallAnalyzer::visitCastInst(CastInst &I) {
00463   // Propagate constants through ptrtoint.
00464   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00465   if (!COp)
00466     COp = SimplifiedValues.lookup(I.getOperand(0));
00467   if (COp)
00468     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
00469       SimplifiedValues[&I] = C;
00470       return true;
00471     }
00472 
00473   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
00474   disableSROA(I.getOperand(0));
00475 
00476   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00477 }
00478 
00479 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
00480   Value *Operand = I.getOperand(0);
00481   Constant *COp = dyn_cast<Constant>(Operand);
00482   if (!COp)
00483     COp = SimplifiedValues.lookup(Operand);
00484   if (COp) {
00485     const DataLayout &DL = F.getParent()->getDataLayout();
00486     if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
00487                                                COp, DL)) {
00488       SimplifiedValues[&I] = C;
00489       return true;
00490     }
00491   }
00492 
00493   // Disable any SROA on the argument to arbitrary unary operators.
00494   disableSROA(Operand);
00495 
00496   return false;
00497 }
00498 
00499 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
00500   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00501   // First try to handle simplified comparisons.
00502   if (!isa<Constant>(LHS))
00503     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00504       LHS = SimpleLHS;
00505   if (!isa<Constant>(RHS))
00506     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00507       RHS = SimpleRHS;
00508   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
00509     if (Constant *CRHS = dyn_cast<Constant>(RHS))
00510       if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
00511         SimplifiedValues[&I] = C;
00512         return true;
00513       }
00514   }
00515 
00516   if (I.getOpcode() == Instruction::FCmp)
00517     return false;
00518 
00519   // Otherwise look for a comparison between constant offset pointers with
00520   // a common base.
00521   Value *LHSBase, *RHSBase;
00522   APInt LHSOffset, RHSOffset;
00523   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00524   if (LHSBase) {
00525     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00526     if (RHSBase && LHSBase == RHSBase) {
00527       // We have common bases, fold the icmp to a constant based on the
00528       // offsets.
00529       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00530       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00531       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
00532         SimplifiedValues[&I] = C;
00533         ++NumConstantPtrCmps;
00534         return true;
00535       }
00536     }
00537   }
00538 
00539   // If the comparison is an equality comparison with null, we can simplify it
00540   // for any alloca-derived argument.
00541   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
00542     if (isAllocaDerivedArg(I.getOperand(0))) {
00543       // We can actually predict the result of comparisons between an
00544       // alloca-derived value and null. Note that this fires regardless of
00545       // SROA firing.
00546       bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
00547       SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
00548                                         : ConstantInt::getFalse(I.getType());
00549       return true;
00550     }
00551 
00552   // Finally check for SROA candidates in comparisons.
00553   Value *SROAArg;
00554   DenseMap<Value *, int>::iterator CostIt;
00555   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00556     if (isa<ConstantPointerNull>(I.getOperand(1))) {
00557       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00558       return true;
00559     }
00560 
00561     disableSROA(CostIt);
00562   }
00563 
00564   return false;
00565 }
00566 
00567 bool CallAnalyzer::visitSub(BinaryOperator &I) {
00568   // Try to handle a special case: we can fold computing the difference of two
00569   // constant-related pointers.
00570   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00571   Value *LHSBase, *RHSBase;
00572   APInt LHSOffset, RHSOffset;
00573   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00574   if (LHSBase) {
00575     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00576     if (RHSBase && LHSBase == RHSBase) {
00577       // We have common bases, fold the subtract to a constant based on the
00578       // offsets.
00579       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00580       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00581       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
00582         SimplifiedValues[&I] = C;
00583         ++NumConstantPtrDiffs;
00584         return true;
00585       }
00586     }
00587   }
00588 
00589   // Otherwise, fall back to the generic logic for simplifying and handling
00590   // instructions.
00591   return Base::visitSub(I);
00592 }
00593 
00594 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
00595   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00596   const DataLayout &DL = F.getParent()->getDataLayout();
00597   if (!isa<Constant>(LHS))
00598     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00599       LHS = SimpleLHS;
00600   if (!isa<Constant>(RHS))
00601     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00602       RHS = SimpleRHS;
00603   Value *SimpleV = nullptr;
00604   if (auto FI = dyn_cast<FPMathOperator>(&I))
00605     SimpleV =
00606         SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
00607   else
00608     SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
00609 
00610   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
00611     SimplifiedValues[&I] = C;
00612     return true;
00613   }
00614 
00615   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
00616   disableSROA(LHS);
00617   disableSROA(RHS);
00618 
00619   return false;
00620 }
00621 
00622 bool CallAnalyzer::visitLoad(LoadInst &I) {
00623   Value *SROAArg;
00624   DenseMap<Value *, int>::iterator CostIt;
00625   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00626     if (I.isSimple()) {
00627       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00628       return true;
00629     }
00630 
00631     disableSROA(CostIt);
00632   }
00633 
00634   return false;
00635 }
00636 
00637 bool CallAnalyzer::visitStore(StoreInst &I) {
00638   Value *SROAArg;
00639   DenseMap<Value *, int>::iterator CostIt;
00640   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00641     if (I.isSimple()) {
00642       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00643       return true;
00644     }
00645 
00646     disableSROA(CostIt);
00647   }
00648 
00649   return false;
00650 }
00651 
00652 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
00653   // Constant folding for extract value is trivial.
00654   Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
00655   if (!C)
00656     C = SimplifiedValues.lookup(I.getAggregateOperand());
00657   if (C) {
00658     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
00659     return true;
00660   }
00661 
00662   // SROA can look through these but give them a cost.
00663   return false;
00664 }
00665 
00666 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
00667   // Constant folding for insert value is trivial.
00668   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
00669   if (!AggC)
00670     AggC = SimplifiedValues.lookup(I.getAggregateOperand());
00671   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
00672   if (!InsertedC)
00673     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
00674   if (AggC && InsertedC) {
00675     SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
00676                                                         I.getIndices());
00677     return true;
00678   }
00679 
00680   // SROA can look through these but give them a cost.
00681   return false;
00682 }
00683 
00684 /// \brief Try to simplify a call site.
00685 ///
00686 /// Takes a concrete function and callsite and tries to actually simplify it by
00687 /// analyzing the arguments and call itself with instsimplify. Returns true if
00688 /// it has simplified the callsite to some other entity (a constant), making it
00689 /// free.
00690 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
00691   // FIXME: Using the instsimplify logic directly for this is inefficient
00692   // because we have to continually rebuild the argument list even when no
00693   // simplifications can be performed. Until that is fixed with remapping
00694   // inside of instsimplify, directly constant fold calls here.
00695   if (!canConstantFoldCallTo(F))
00696     return false;
00697 
00698   // Try to re-map the arguments to constants.
00699   SmallVector<Constant *, 4> ConstantArgs;
00700   ConstantArgs.reserve(CS.arg_size());
00701   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
00702        I != E; ++I) {
00703     Constant *C = dyn_cast<Constant>(*I);
00704     if (!C)
00705       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
00706     if (!C)
00707       return false; // This argument doesn't map to a constant.
00708 
00709     ConstantArgs.push_back(C);
00710   }
00711   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
00712     SimplifiedValues[CS.getInstruction()] = C;
00713     return true;
00714   }
00715 
00716   return false;
00717 }
00718 
00719 bool CallAnalyzer::visitCallSite(CallSite CS) {
00720   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
00721       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
00722     // This aborts the entire analysis.
00723     ExposesReturnsTwice = true;
00724     return false;
00725   }
00726   if (CS.isCall() &&
00727       cast<CallInst>(CS.getInstruction())->cannotDuplicate())
00728     ContainsNoDuplicateCall = true;
00729 
00730   if (Function *F = CS.getCalledFunction()) {
00731     // When we have a concrete function, first try to simplify it directly.
00732     if (simplifyCallSite(F, CS))
00733       return true;
00734 
00735     // Next check if it is an intrinsic we know about.
00736     // FIXME: Lift this into part of the InstVisitor.
00737     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
00738       switch (II->getIntrinsicID()) {
00739       default:
00740         return Base::visitCallSite(CS);
00741 
00742       case Intrinsic::memset:
00743       case Intrinsic::memcpy:
00744       case Intrinsic::memmove:
00745         // SROA can usually chew through these intrinsics, but they aren't free.
00746         return false;
00747       case Intrinsic::frameescape:
00748         HasFrameEscape = true;
00749         return false;
00750       }
00751     }
00752 
00753     if (F == CS.getInstruction()->getParent()->getParent()) {
00754       // This flag will fully abort the analysis, so don't bother with anything
00755       // else.
00756       IsRecursiveCall = true;
00757       return false;
00758     }
00759 
00760     if (TTI.isLoweredToCall(F)) {
00761       // We account for the average 1 instruction per call argument setup
00762       // here.
00763       Cost += CS.arg_size() * InlineConstants::InstrCost;
00764 
00765       // Everything other than inline ASM will also have a significant cost
00766       // merely from making the call.
00767       if (!isa<InlineAsm>(CS.getCalledValue()))
00768         Cost += InlineConstants::CallPenalty;
00769     }
00770 
00771     return Base::visitCallSite(CS);
00772   }
00773 
00774   // Otherwise we're in a very special case -- an indirect function call. See
00775   // if we can be particularly clever about this.
00776   Value *Callee = CS.getCalledValue();
00777 
00778   // First, pay the price of the argument setup. We account for the average
00779   // 1 instruction per call argument setup here.
00780   Cost += CS.arg_size() * InlineConstants::InstrCost;
00781 
00782   // Next, check if this happens to be an indirect function call to a known
00783   // function in this inline context. If not, we've done all we can.
00784   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
00785   if (!F)
00786     return Base::visitCallSite(CS);
00787 
00788   // If we have a constant that we are calling as a function, we can peer
00789   // through it and see the function target. This happens not infrequently
00790   // during devirtualization and so we want to give it a hefty bonus for
00791   // inlining, but cap that bonus in the event that inlining wouldn't pan
00792   // out. Pretend to inline the function, with a custom threshold.
00793   CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
00794   if (CA.analyzeCall(CS)) {
00795     // We were able to inline the indirect call! Subtract the cost from the
00796     // bonus we want to apply, but don't go below zero.
00797     Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
00798   }
00799 
00800   return Base::visitCallSite(CS);
00801 }
00802 
00803 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
00804   // At least one return instruction will be free after inlining.
00805   bool Free = !HasReturn;
00806   HasReturn = true;
00807   return Free;
00808 }
00809 
00810 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
00811   // We model unconditional branches as essentially free -- they really
00812   // shouldn't exist at all, but handling them makes the behavior of the
00813   // inliner more regular and predictable. Interestingly, conditional branches
00814   // which will fold away are also free.
00815   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
00816          dyn_cast_or_null<ConstantInt>(
00817              SimplifiedValues.lookup(BI.getCondition()));
00818 }
00819 
00820 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
00821   // We model unconditional switches as free, see the comments on handling
00822   // branches.
00823   if (isa<ConstantInt>(SI.getCondition()))
00824     return true;
00825   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
00826     if (isa<ConstantInt>(V))
00827       return true;
00828 
00829   // Otherwise, we need to accumulate a cost proportional to the number of
00830   // distinct successor blocks. This fan-out in the CFG cannot be represented
00831   // for free even if we can represent the core switch as a jumptable that
00832   // takes a single instruction.
00833   //
00834   // NB: We convert large switches which are just used to initialize large phi
00835   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
00836   // inlining those. It will prevent inlining in cases where the optimization
00837   // does not (yet) fire.
00838   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
00839   SuccessorBlocks.insert(SI.getDefaultDest());
00840   for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
00841     SuccessorBlocks.insert(I.getCaseSuccessor());
00842   // Add cost corresponding to the number of distinct destinations. The first
00843   // we model as free because of fallthrough.
00844   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
00845   return false;
00846 }
00847 
00848 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
00849   // We never want to inline functions that contain an indirectbr.  This is
00850   // incorrect because all the blockaddress's (in static global initializers
00851   // for example) would be referring to the original function, and this
00852   // indirect jump would jump from the inlined copy of the function into the
00853   // original function which is extremely undefined behavior.
00854   // FIXME: This logic isn't really right; we can safely inline functions with
00855   // indirectbr's as long as no other function or global references the
00856   // blockaddress of a block within the current function.
00857   HasIndirectBr = true;
00858   return false;
00859 }
00860 
00861 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
00862   // FIXME: It's not clear that a single instruction is an accurate model for
00863   // the inline cost of a resume instruction.
00864   return false;
00865 }
00866 
00867 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
00868   // FIXME: It might be reasonably to discount the cost of instructions leading
00869   // to unreachable as they have the lowest possible impact on both runtime and
00870   // code size.
00871   return true; // No actual code is needed for unreachable.
00872 }
00873 
00874 bool CallAnalyzer::visitInstruction(Instruction &I) {
00875   // Some instructions are free. All of the free intrinsics can also be
00876   // handled by SROA, etc.
00877   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
00878     return true;
00879 
00880   // We found something we don't understand or can't handle. Mark any SROA-able
00881   // values in the operand list as no longer viable.
00882   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
00883     disableSROA(*OI);
00884 
00885   return false;
00886 }
00887 
00888 
00889 /// \brief Analyze a basic block for its contribution to the inline cost.
00890 ///
00891 /// This method walks the analyzer over every instruction in the given basic
00892 /// block and accounts for their cost during inlining at this callsite. It
00893 /// aborts early if the threshold has been exceeded or an impossible to inline
00894 /// construct has been detected. It returns false if inlining is no longer
00895 /// viable, and true if inlining remains viable.
00896 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
00897                                 SmallPtrSetImpl<const Value *> &EphValues) {
00898   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
00899     // FIXME: Currently, the number of instructions in a function regardless of
00900     // our ability to simplify them during inline to constants or dead code,
00901     // are actually used by the vector bonus heuristic. As long as that's true,
00902     // we have to special case debug intrinsics here to prevent differences in
00903     // inlining due to debug symbols. Eventually, the number of unsimplified
00904     // instructions shouldn't factor into the cost computation, but until then,
00905     // hack around it here.
00906     if (isa<DbgInfoIntrinsic>(I))
00907       continue;
00908 
00909     // Skip ephemeral values.
00910     if (EphValues.count(I))
00911       continue;
00912 
00913     ++NumInstructions;
00914     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
00915       ++NumVectorInstructions;
00916 
00917     // If the instruction is floating point, and the target says this operation is
00918     // expensive or the function has the "use-soft-float" attribute, this may
00919     // eventually become a library call.  Treat the cost as such.
00920     if (I->getType()->isFloatingPointTy()) {
00921       bool hasSoftFloatAttr = false;
00922 
00923       // If the function has the "use-soft-float" attribute, mark it as expensive.
00924       if (F.hasFnAttribute("use-soft-float")) {
00925         Attribute Attr = F.getFnAttribute("use-soft-float");
00926         StringRef Val = Attr.getValueAsString();
00927         if (Val == "true")
00928           hasSoftFloatAttr = true;
00929       }
00930 
00931       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
00932           hasSoftFloatAttr)
00933         Cost += InlineConstants::CallPenalty;
00934     }
00935 
00936     // If the instruction simplified to a constant, there is no cost to this
00937     // instruction. Visit the instructions using our InstVisitor to account for
00938     // all of the per-instruction logic. The visit tree returns true if we
00939     // consumed the instruction in any way, and false if the instruction's base
00940     // cost should count against inlining.
00941     if (Base::visit(I))
00942       ++NumInstructionsSimplified;
00943     else
00944       Cost += InlineConstants::InstrCost;
00945 
00946     // If the visit this instruction detected an uninlinable pattern, abort.
00947     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
00948         HasIndirectBr || HasFrameEscape)
00949       return false;
00950 
00951     // If the caller is a recursive function then we don't want to inline
00952     // functions which allocate a lot of stack space because it would increase
00953     // the caller stack usage dramatically.
00954     if (IsCallerRecursive &&
00955         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
00956       return false;
00957 
00958     if (NumVectorInstructions > NumInstructions/2)
00959       VectorBonus = FiftyPercentVectorBonus;
00960     else if (NumVectorInstructions > NumInstructions/10)
00961       VectorBonus = TenPercentVectorBonus;
00962     else
00963       VectorBonus = 0;
00964 
00965     // Check if we've past the threshold so we don't spin in huge basic
00966     // blocks that will never inline.
00967     if (Cost > (Threshold + VectorBonus))
00968       return false;
00969   }
00970 
00971   return true;
00972 }
00973 
00974 /// \brief Compute the base pointer and cumulative constant offsets for V.
00975 ///
00976 /// This strips all constant offsets off of V, leaving it the base pointer, and
00977 /// accumulates the total constant offset applied in the returned constant. It
00978 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
00979 /// no constant offsets applied.
00980 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
00981   if (!V->getType()->isPointerTy())
00982     return nullptr;
00983 
00984   const DataLayout &DL = F.getParent()->getDataLayout();
00985   unsigned IntPtrWidth = DL.getPointerSizeInBits();
00986   APInt Offset = APInt::getNullValue(IntPtrWidth);
00987 
00988   // Even though we don't look through PHI nodes, we could be called on an
00989   // instruction in an unreachable block, which may be on a cycle.
00990   SmallPtrSet<Value *, 4> Visited;
00991   Visited.insert(V);
00992   do {
00993     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
00994       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
00995         return nullptr;
00996       V = GEP->getPointerOperand();
00997     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
00998       V = cast<Operator>(V)->getOperand(0);
00999     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
01000       if (GA->mayBeOverridden())
01001         break;
01002       V = GA->getAliasee();
01003     } else {
01004       break;
01005     }
01006     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
01007   } while (Visited.insert(V).second);
01008 
01009   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
01010   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
01011 }
01012 
01013 /// \brief Analyze a call site for potential inlining.
01014 ///
01015 /// Returns true if inlining this call is viable, and false if it is not
01016 /// viable. It computes the cost and adjusts the threshold based on numerous
01017 /// factors and heuristics. If this method returns false but the computed cost
01018 /// is below the computed threshold, then inlining was forcibly disabled by
01019 /// some artifact of the routine.
01020 bool CallAnalyzer::analyzeCall(CallSite CS) {
01021   ++NumCallsAnalyzed;
01022 
01023   // Track whether the post-inlining function would have more than one basic
01024   // block. A single basic block is often intended for inlining. Balloon the
01025   // threshold by 50% until we pass the single-BB phase.
01026   bool SingleBB = true;
01027   int SingleBBBonus = Threshold / 2;
01028   Threshold += SingleBBBonus;
01029 
01030   // Perform some tweaks to the cost and threshold based on the direct
01031   // callsite information.
01032 
01033   // We want to more aggressively inline vector-dense kernels, so up the
01034   // threshold, and we'll lower it if the % of vector instructions gets too
01035   // low.
01036   assert(NumInstructions == 0);
01037   assert(NumVectorInstructions == 0);
01038   FiftyPercentVectorBonus = Threshold;
01039   TenPercentVectorBonus = Threshold / 2;
01040   const DataLayout &DL = F.getParent()->getDataLayout();
01041 
01042   // Give out bonuses per argument, as the instructions setting them up will
01043   // be gone after inlining.
01044   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
01045     if (CS.isByValArgument(I)) {
01046       // We approximate the number of loads and stores needed by dividing the
01047       // size of the byval type by the target's pointer size.
01048       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
01049       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
01050       unsigned PointerSize = DL.getPointerSizeInBits();
01051       // Ceiling division.
01052       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
01053 
01054       // If it generates more than 8 stores it is likely to be expanded as an
01055       // inline memcpy so we take that as an upper bound. Otherwise we assume
01056       // one load and one store per word copied.
01057       // FIXME: The maxStoresPerMemcpy setting from the target should be used
01058       // here instead of a magic number of 8, but it's not available via
01059       // DataLayout.
01060       NumStores = std::min(NumStores, 8U);
01061 
01062       Cost -= 2 * NumStores * InlineConstants::InstrCost;
01063     } else {
01064       // For non-byval arguments subtract off one instruction per call
01065       // argument.
01066       Cost -= InlineConstants::InstrCost;
01067     }
01068   }
01069 
01070   // If there is only one call of the function, and it has internal linkage,
01071   // the cost of inlining it drops dramatically.
01072   bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
01073     &F == CS.getCalledFunction();
01074   if (OnlyOneCallAndLocalLinkage)
01075     Cost += InlineConstants::LastCallToStaticBonus;
01076 
01077   // If the instruction after the call, or if the normal destination of the
01078   // invoke is an unreachable instruction, the function is noreturn. As such,
01079   // there is little point in inlining this unless there is literally zero
01080   // cost.
01081   Instruction *Instr = CS.getInstruction();
01082   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
01083     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
01084       Threshold = 1;
01085   } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
01086     Threshold = 1;
01087 
01088   // If this function uses the coldcc calling convention, prefer not to inline
01089   // it.
01090   if (F.getCallingConv() == CallingConv::Cold)
01091     Cost += InlineConstants::ColdccPenalty;
01092 
01093   // Check if we're done. This can happen due to bonuses and penalties.
01094   if (Cost > Threshold)
01095     return false;
01096 
01097   if (F.empty())
01098     return true;
01099 
01100   Function *Caller = CS.getInstruction()->getParent()->getParent();
01101   // Check if the caller function is recursive itself.
01102   for (User *U : Caller->users()) {
01103     CallSite Site(U);
01104     if (!Site)
01105       continue;
01106     Instruction *I = Site.getInstruction();
01107     if (I->getParent()->getParent() == Caller) {
01108       IsCallerRecursive = true;
01109       break;
01110     }
01111   }
01112 
01113   // Populate our simplified values by mapping from function arguments to call
01114   // arguments with known important simplifications.
01115   CallSite::arg_iterator CAI = CS.arg_begin();
01116   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
01117        FAI != FAE; ++FAI, ++CAI) {
01118     assert(CAI != CS.arg_end());
01119     if (Constant *C = dyn_cast<Constant>(CAI))
01120       SimplifiedValues[FAI] = C;
01121 
01122     Value *PtrArg = *CAI;
01123     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
01124       ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
01125 
01126       // We can SROA any pointer arguments derived from alloca instructions.
01127       if (isa<AllocaInst>(PtrArg)) {
01128         SROAArgValues[FAI] = PtrArg;
01129         SROAArgCosts[PtrArg] = 0;
01130       }
01131     }
01132   }
01133   NumConstantArgs = SimplifiedValues.size();
01134   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
01135   NumAllocaArgs = SROAArgValues.size();
01136 
01137   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
01138   // the ephemeral values multiple times (and they're completely determined by
01139   // the callee, so this is purely duplicate work).
01140   SmallPtrSet<const Value *, 32> EphValues;
01141   CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
01142 
01143   // The worklist of live basic blocks in the callee *after* inlining. We avoid
01144   // adding basic blocks of the callee which can be proven to be dead for this
01145   // particular call site in order to get more accurate cost estimates. This
01146   // requires a somewhat heavyweight iteration pattern: we need to walk the
01147   // basic blocks in a breadth-first order as we insert live successors. To
01148   // accomplish this, prioritizing for small iterations because we exit after
01149   // crossing our threshold, we use a small-size optimized SetVector.
01150   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
01151                                   SmallPtrSet<BasicBlock *, 16> > BBSetVector;
01152   BBSetVector BBWorklist;
01153   BBWorklist.insert(&F.getEntryBlock());
01154   // Note that we *must not* cache the size, this loop grows the worklist.
01155   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
01156     // Bail out the moment we cross the threshold. This means we'll under-count
01157     // the cost, but only when undercounting doesn't matter.
01158     if (Cost > (Threshold + VectorBonus))
01159       break;
01160 
01161     BasicBlock *BB = BBWorklist[Idx];
01162     if (BB->empty())
01163       continue;
01164 
01165     // Disallow inlining a blockaddress. A blockaddress only has defined
01166     // behavior for an indirect branch in the same function, and we do not
01167     // currently support inlining indirect branches. But, the inliner may not
01168     // see an indirect branch that ends up being dead code at a particular call
01169     // site. If the blockaddress escapes the function, e.g., via a global
01170     // variable, inlining may lead to an invalid cross-function reference.
01171     if (BB->hasAddressTaken())
01172       return false;
01173 
01174     // Analyze the cost of this block. If we blow through the threshold, this
01175     // returns false, and we can bail on out.
01176     if (!analyzeBlock(BB, EphValues)) {
01177       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
01178           HasIndirectBr || HasFrameEscape)
01179         return false;
01180 
01181       // If the caller is a recursive function then we don't want to inline
01182       // functions which allocate a lot of stack space because it would increase
01183       // the caller stack usage dramatically.
01184       if (IsCallerRecursive &&
01185           AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
01186         return false;
01187 
01188       break;
01189     }
01190 
01191     TerminatorInst *TI = BB->getTerminator();
01192 
01193     // Add in the live successors by first checking whether we have terminator
01194     // that may be simplified based on the values simplified by this call.
01195     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
01196       if (BI->isConditional()) {
01197         Value *Cond = BI->getCondition();
01198         if (ConstantInt *SimpleCond
01199               = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01200           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
01201           continue;
01202         }
01203       }
01204     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
01205       Value *Cond = SI->getCondition();
01206       if (ConstantInt *SimpleCond
01207             = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01208         BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
01209         continue;
01210       }
01211     }
01212 
01213     // If we're unable to select a particular successor, just count all of
01214     // them.
01215     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
01216          ++TIdx)
01217       BBWorklist.insert(TI->getSuccessor(TIdx));
01218 
01219     // If we had any successors at this point, than post-inlining is likely to
01220     // have them as well. Note that we assume any basic blocks which existed
01221     // due to branches or switches which folded above will also fold after
01222     // inlining.
01223     if (SingleBB && TI->getNumSuccessors() > 1) {
01224       // Take off the bonus we applied to the threshold.
01225       Threshold -= SingleBBBonus;
01226       SingleBB = false;
01227     }
01228   }
01229 
01230   // If this is a noduplicate call, we can still inline as long as
01231   // inlining this would cause the removal of the caller (so the instruction
01232   // is not actually duplicated, just moved).
01233   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
01234     return false;
01235 
01236   Threshold += VectorBonus;
01237 
01238   return Cost < Threshold;
01239 }
01240 
01241 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01242 /// \brief Dump stats about this call's analysis.
01243 void CallAnalyzer::dump() {
01244 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
01245   DEBUG_PRINT_STAT(NumConstantArgs);
01246   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
01247   DEBUG_PRINT_STAT(NumAllocaArgs);
01248   DEBUG_PRINT_STAT(NumConstantPtrCmps);
01249   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
01250   DEBUG_PRINT_STAT(NumInstructionsSimplified);
01251   DEBUG_PRINT_STAT(SROACostSavings);
01252   DEBUG_PRINT_STAT(SROACostSavingsLost);
01253   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
01254   DEBUG_PRINT_STAT(Cost);
01255   DEBUG_PRINT_STAT(Threshold);
01256   DEBUG_PRINT_STAT(VectorBonus);
01257 #undef DEBUG_PRINT_STAT
01258 }
01259 #endif
01260 
01261 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01262                       true, true)
01263 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
01264 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
01265 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01266                     true, true)
01267 
01268 char InlineCostAnalysis::ID = 0;
01269 
01270 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
01271 
01272 InlineCostAnalysis::~InlineCostAnalysis() {}
01273 
01274 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
01275   AU.setPreservesAll();
01276   AU.addRequired<AssumptionCacheTracker>();
01277   AU.addRequired<TargetTransformInfoWrapperPass>();
01278   CallGraphSCCPass::getAnalysisUsage(AU);
01279 }
01280 
01281 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
01282   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
01283   ACT = &getAnalysis<AssumptionCacheTracker>();
01284   return false;
01285 }
01286 
01287 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
01288   return getInlineCost(CS, CS.getCalledFunction(), Threshold);
01289 }
01290 
01291 /// \brief Test that two functions either have or have not the given attribute
01292 ///        at the same time.
01293 template<typename AttrKind>
01294 static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
01295   return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
01296 }
01297 
01298 /// \brief Test that there are no attribute conflicts between Caller and Callee
01299 ///        that prevent inlining.
01300 static bool functionsHaveCompatibleAttributes(Function *Caller,
01301                                               Function *Callee) {
01302   return attributeMatches(Caller, Callee, "target-cpu") &&
01303          attributeMatches(Caller, Callee, "target-features") &&
01304          attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
01305          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
01306          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
01307 }
01308 
01309 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
01310                                              int Threshold) {
01311   // Cannot inline indirect calls.
01312   if (!Callee)
01313     return llvm::InlineCost::getNever();
01314 
01315   // Calls to functions with always-inline attributes should be inlined
01316   // whenever possible.
01317   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
01318     if (isInlineViable(*Callee))
01319       return llvm::InlineCost::getAlways();
01320     return llvm::InlineCost::getNever();
01321   }
01322 
01323   // Never inline functions with conflicting attributes (unless callee has
01324   // always-inline attribute).
01325   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
01326     return llvm::InlineCost::getNever();
01327 
01328   // Don't inline this call if the caller has the optnone attribute.
01329   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
01330     return llvm::InlineCost::getNever();
01331 
01332   // Don't inline functions which can be redefined at link-time to mean
01333   // something else.  Don't inline functions marked noinline or call sites
01334   // marked noinline.
01335   if (Callee->mayBeOverridden() ||
01336       Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
01337     return llvm::InlineCost::getNever();
01338 
01339   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
01340         << "...\n");
01341 
01342   CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
01343   bool ShouldInline = CA.analyzeCall(CS);
01344 
01345   DEBUG(CA.dump());
01346 
01347   // Check if there was a reason to force inlining or no inlining.
01348   if (!ShouldInline && CA.getCost() < CA.getThreshold())
01349     return InlineCost::getNever();
01350   if (ShouldInline && CA.getCost() >= CA.getThreshold())
01351     return InlineCost::getAlways();
01352 
01353   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
01354 }
01355 
01356 bool InlineCostAnalysis::isInlineViable(Function &F) {
01357   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
01358   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
01359     // Disallow inlining of functions which contain indirect branches or
01360     // blockaddresses.
01361     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
01362       return false;
01363 
01364     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
01365          ++II) {
01366       CallSite CS(II);
01367       if (!CS)
01368         continue;
01369 
01370       // Disallow recursive calls.
01371       if (&F == CS.getCalledFunction())
01372         return false;
01373 
01374       // Disallow calls which expose returns-twice to a function not previously
01375       // attributed as such.
01376       if (!ReturnsTwice && CS.isCall() &&
01377           cast<CallInst>(CS.getInstruction())->canReturnTwice())
01378         return false;
01379 
01380       // Disallow inlining functions that call @llvm.frameescape. Doing this
01381       // correctly would require major changes to the inliner.
01382       if (CS.getCalledFunction() &&
01383           CS.getCalledFunction()->getIntrinsicID() ==
01384               llvm::Intrinsic::frameescape)
01385         return false;
01386     }
01387   }
01388 
01389   return true;
01390 }