LLVM  mainline
InlineCost.cpp
Go to the documentation of this file.
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements inline cost analysis.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/Analysis/InlineCost.h"
00015 #include "llvm/ADT/STLExtras.h"
00016 #include "llvm/ADT/SetVector.h"
00017 #include "llvm/ADT/SmallPtrSet.h"
00018 #include "llvm/ADT/SmallVector.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/AssumptionCache.h"
00021 #include "llvm/Analysis/CodeMetrics.h"
00022 #include "llvm/Analysis/ConstantFolding.h"
00023 #include "llvm/Analysis/InstructionSimplify.h"
00024 #include "llvm/Analysis/TargetTransformInfo.h"
00025 #include "llvm/IR/CallSite.h"
00026 #include "llvm/IR/CallingConv.h"
00027 #include "llvm/IR/DataLayout.h"
00028 #include "llvm/IR/GetElementPtrTypeIterator.h"
00029 #include "llvm/IR/GlobalAlias.h"
00030 #include "llvm/IR/InstVisitor.h"
00031 #include "llvm/IR/IntrinsicInst.h"
00032 #include "llvm/IR/Operator.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Support/raw_ostream.h"
00035 
00036 using namespace llvm;
00037 
00038 #define DEBUG_TYPE "inline-cost"
00039 
00040 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
00041 
00042 namespace {
00043 
00044 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
00045   typedef InstVisitor<CallAnalyzer, bool> Base;
00046   friend class InstVisitor<CallAnalyzer, bool>;
00047 
00048   /// The TargetTransformInfo available for this compilation.
00049   const TargetTransformInfo &TTI;
00050 
00051   /// The cache of @llvm.assume intrinsics.
00052   AssumptionCacheTracker *ACT;
00053 
00054   // The called function.
00055   Function &F;
00056 
00057   int Threshold;
00058   int Cost;
00059 
00060   bool IsCallerRecursive;
00061   bool IsRecursiveCall;
00062   bool ExposesReturnsTwice;
00063   bool HasDynamicAlloca;
00064   bool ContainsNoDuplicateCall;
00065   bool HasReturn;
00066   bool HasIndirectBr;
00067   bool HasFrameEscape;
00068 
00069   /// Number of bytes allocated statically by the callee.
00070   uint64_t AllocatedSize;
00071   unsigned NumInstructions, NumVectorInstructions;
00072   int FiftyPercentVectorBonus, TenPercentVectorBonus;
00073   int VectorBonus;
00074 
00075   // While we walk the potentially-inlined instructions, we build up and
00076   // maintain a mapping of simplified values specific to this callsite. The
00077   // idea is to propagate any special information we have about arguments to
00078   // this call through the inlinable section of the function, and account for
00079   // likely simplifications post-inlining. The most important aspect we track
00080   // is CFG altering simplifications -- when we prove a basic block dead, that
00081   // can cause dramatic shifts in the cost of inlining a function.
00082   DenseMap<Value *, Constant *> SimplifiedValues;
00083 
00084   // Keep track of the values which map back (through function arguments) to
00085   // allocas on the caller stack which could be simplified through SROA.
00086   DenseMap<Value *, Value *> SROAArgValues;
00087 
00088   // The mapping of caller Alloca values to their accumulated cost savings. If
00089   // we have to disable SROA for one of the allocas, this tells us how much
00090   // cost must be added.
00091   DenseMap<Value *, int> SROAArgCosts;
00092 
00093   // Keep track of values which map to a pointer base and constant offset.
00094   DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
00095 
00096   // Custom simplification helper routines.
00097   bool isAllocaDerivedArg(Value *V);
00098   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
00099                             DenseMap<Value *, int>::iterator &CostIt);
00100   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
00101   void disableSROA(Value *V);
00102   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00103                           int InstructionCost);
00104   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
00105   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
00106   bool simplifyCallSite(Function *F, CallSite CS);
00107   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
00108 
00109   // Custom analysis routines.
00110   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
00111 
00112   // Disable several entry points to the visitor so we don't accidentally use
00113   // them by declaring but not defining them here.
00114   void visit(Module *);     void visit(Module &);
00115   void visit(Function *);   void visit(Function &);
00116   void visit(BasicBlock *); void visit(BasicBlock &);
00117 
00118   // Provide base case for our instruction visit.
00119   bool visitInstruction(Instruction &I);
00120 
00121   // Our visit overrides.
00122   bool visitAlloca(AllocaInst &I);
00123   bool visitPHI(PHINode &I);
00124   bool visitGetElementPtr(GetElementPtrInst &I);
00125   bool visitBitCast(BitCastInst &I);
00126   bool visitPtrToInt(PtrToIntInst &I);
00127   bool visitIntToPtr(IntToPtrInst &I);
00128   bool visitCastInst(CastInst &I);
00129   bool visitUnaryInstruction(UnaryInstruction &I);
00130   bool visitCmpInst(CmpInst &I);
00131   bool visitSub(BinaryOperator &I);
00132   bool visitBinaryOperator(BinaryOperator &I);
00133   bool visitLoad(LoadInst &I);
00134   bool visitStore(StoreInst &I);
00135   bool visitExtractValue(ExtractValueInst &I);
00136   bool visitInsertValue(InsertValueInst &I);
00137   bool visitCallSite(CallSite CS);
00138   bool visitReturnInst(ReturnInst &RI);
00139   bool visitBranchInst(BranchInst &BI);
00140   bool visitSwitchInst(SwitchInst &SI);
00141   bool visitIndirectBrInst(IndirectBrInst &IBI);
00142   bool visitResumeInst(ResumeInst &RI);
00143   bool visitUnreachableInst(UnreachableInst &I);
00144 
00145 public:
00146   CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
00147                Function &Callee, int Threshold)
00148       : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
00149         IsCallerRecursive(false), IsRecursiveCall(false),
00150         ExposesReturnsTwice(false), HasDynamicAlloca(false),
00151         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
00152         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
00153         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
00154         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
00155         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
00156         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
00157         SROACostSavings(0), SROACostSavingsLost(0) {}
00158 
00159   bool analyzeCall(CallSite CS);
00160 
00161   int getThreshold() { return Threshold; }
00162   int getCost() { return Cost; }
00163 
00164   // Keep a bunch of stats about the cost savings found so we can print them
00165   // out when debugging.
00166   unsigned NumConstantArgs;
00167   unsigned NumConstantOffsetPtrArgs;
00168   unsigned NumAllocaArgs;
00169   unsigned NumConstantPtrCmps;
00170   unsigned NumConstantPtrDiffs;
00171   unsigned NumInstructionsSimplified;
00172   unsigned SROACostSavings;
00173   unsigned SROACostSavingsLost;
00174 
00175   void dump();
00176 };
00177 
00178 } // namespace
00179 
00180 /// \brief Test whether the given value is an Alloca-derived function argument.
00181 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
00182   return SROAArgValues.count(V);
00183 }
00184 
00185 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
00186 /// Returns false if V does not map to a SROA-candidate.
00187 bool CallAnalyzer::lookupSROAArgAndCost(
00188     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
00189   if (SROAArgValues.empty() || SROAArgCosts.empty())
00190     return false;
00191 
00192   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
00193   if (ArgIt == SROAArgValues.end())
00194     return false;
00195 
00196   Arg = ArgIt->second;
00197   CostIt = SROAArgCosts.find(Arg);
00198   return CostIt != SROAArgCosts.end();
00199 }
00200 
00201 /// \brief Disable SROA for the candidate marked by this cost iterator.
00202 ///
00203 /// This marks the candidate as no longer viable for SROA, and adds the cost
00204 /// savings associated with it back into the inline cost measurement.
00205 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
00206   // If we're no longer able to perform SROA we need to undo its cost savings
00207   // and prevent subsequent analysis.
00208   Cost += CostIt->second;
00209   SROACostSavings -= CostIt->second;
00210   SROACostSavingsLost += CostIt->second;
00211   SROAArgCosts.erase(CostIt);
00212 }
00213 
00214 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
00215 void CallAnalyzer::disableSROA(Value *V) {
00216   Value *SROAArg;
00217   DenseMap<Value *, int>::iterator CostIt;
00218   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
00219     disableSROA(CostIt);
00220 }
00221 
00222 /// \brief Accumulate the given cost for a particular SROA candidate.
00223 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00224                                       int InstructionCost) {
00225   CostIt->second += InstructionCost;
00226   SROACostSavings += InstructionCost;
00227 }
00228 
00229 /// \brief Check whether a GEP's indices are all constant.
00230 ///
00231 /// Respects any simplified values known during the analysis of this callsite.
00232 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
00233   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
00234     if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
00235       return false;
00236 
00237   return true;
00238 }
00239 
00240 /// \brief Accumulate a constant GEP offset into an APInt if possible.
00241 ///
00242 /// Returns false if unable to compute the offset for any reason. Respects any
00243 /// simplified values known during the analysis of this callsite.
00244 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
00245   const DataLayout &DL = F.getParent()->getDataLayout();
00246   unsigned IntPtrWidth = DL.getPointerSizeInBits();
00247   assert(IntPtrWidth == Offset.getBitWidth());
00248 
00249   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
00250        GTI != GTE; ++GTI) {
00251     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
00252     if (!OpC)
00253       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
00254         OpC = dyn_cast<ConstantInt>(SimpleOp);
00255     if (!OpC)
00256       return false;
00257     if (OpC->isZero()) continue;
00258 
00259     // Handle a struct index, which adds its field offset to the pointer.
00260     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00261       unsigned ElementIdx = OpC->getZExtValue();
00262       const StructLayout *SL = DL.getStructLayout(STy);
00263       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
00264       continue;
00265     }
00266 
00267     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
00268     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
00269   }
00270   return true;
00271 }
00272 
00273 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
00274   // Check whether inlining will turn a dynamic alloca into a static
00275   // alloca, and handle that case.
00276   if (I.isArrayAllocation()) {
00277     if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
00278       ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
00279       assert(AllocSize && "Allocation size not a constant int?");
00280       Type *Ty = I.getAllocatedType();
00281       AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
00282       return Base::visitAlloca(I);
00283     }
00284   }
00285 
00286   // Accumulate the allocated size.
00287   if (I.isStaticAlloca()) {
00288     const DataLayout &DL = F.getParent()->getDataLayout();
00289     Type *Ty = I.getAllocatedType();
00290     AllocatedSize += DL.getTypeAllocSize(Ty);
00291   }
00292 
00293   // We will happily inline static alloca instructions.
00294   if (I.isStaticAlloca())
00295     return Base::visitAlloca(I);
00296 
00297   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
00298   // a variety of reasons, and so we would like to not inline them into
00299   // functions which don't currently have a dynamic alloca. This simply
00300   // disables inlining altogether in the presence of a dynamic alloca.
00301   HasDynamicAlloca = true;
00302   return false;
00303 }
00304 
00305 bool CallAnalyzer::visitPHI(PHINode &I) {
00306   // FIXME: We should potentially be tracking values through phi nodes,
00307   // especially when they collapse to a single value due to deleted CFG edges
00308   // during inlining.
00309 
00310   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
00311   // though we don't want to propagate it's bonuses. The idea is to disable
00312   // SROA if it *might* be used in an inappropriate manner.
00313 
00314   // Phi nodes are always zero-cost.
00315   return true;
00316 }
00317 
00318 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
00319   Value *SROAArg;
00320   DenseMap<Value *, int>::iterator CostIt;
00321   bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
00322                                             SROAArg, CostIt);
00323 
00324   // Try to fold GEPs of constant-offset call site argument pointers. This
00325   // requires target data and inbounds GEPs.
00326   if (I.isInBounds()) {
00327     // Check if we have a base + offset for the pointer.
00328     Value *Ptr = I.getPointerOperand();
00329     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
00330     if (BaseAndOffset.first) {
00331       // Check if the offset of this GEP is constant, and if so accumulate it
00332       // into Offset.
00333       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
00334         // Non-constant GEPs aren't folded, and disable SROA.
00335         if (SROACandidate)
00336           disableSROA(CostIt);
00337         return false;
00338       }
00339 
00340       // Add the result as a new mapping to Base + Offset.
00341       ConstantOffsetPtrs[&I] = BaseAndOffset;
00342 
00343       // Also handle SROA candidates here, we already know that the GEP is
00344       // all-constant indexed.
00345       if (SROACandidate)
00346         SROAArgValues[&I] = SROAArg;
00347 
00348       return true;
00349     }
00350   }
00351 
00352   if (isGEPOffsetConstant(I)) {
00353     if (SROACandidate)
00354       SROAArgValues[&I] = SROAArg;
00355 
00356     // Constant GEPs are modeled as free.
00357     return true;
00358   }
00359 
00360   // Variable GEPs will require math and will disable SROA.
00361   if (SROACandidate)
00362     disableSROA(CostIt);
00363   return false;
00364 }
00365 
00366 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
00367   // Propagate constants through bitcasts.
00368   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00369   if (!COp)
00370     COp = SimplifiedValues.lookup(I.getOperand(0));
00371   if (COp)
00372     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
00373       SimplifiedValues[&I] = C;
00374       return true;
00375     }
00376 
00377   // Track base/offsets through casts
00378   std::pair<Value *, APInt> BaseAndOffset
00379     = ConstantOffsetPtrs.lookup(I.getOperand(0));
00380   // Casts don't change the offset, just wrap it up.
00381   if (BaseAndOffset.first)
00382     ConstantOffsetPtrs[&I] = BaseAndOffset;
00383 
00384   // Also look for SROA candidates here.
00385   Value *SROAArg;
00386   DenseMap<Value *, int>::iterator CostIt;
00387   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00388     SROAArgValues[&I] = SROAArg;
00389 
00390   // Bitcasts are always zero cost.
00391   return true;
00392 }
00393 
00394 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
00395   // Propagate constants through ptrtoint.
00396   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00397   if (!COp)
00398     COp = SimplifiedValues.lookup(I.getOperand(0));
00399   if (COp)
00400     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
00401       SimplifiedValues[&I] = C;
00402       return true;
00403     }
00404 
00405   // Track base/offset pairs when converted to a plain integer provided the
00406   // integer is large enough to represent the pointer.
00407   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
00408   const DataLayout &DL = F.getParent()->getDataLayout();
00409   if (IntegerSize >= DL.getPointerSizeInBits()) {
00410     std::pair<Value *, APInt> BaseAndOffset
00411       = ConstantOffsetPtrs.lookup(I.getOperand(0));
00412     if (BaseAndOffset.first)
00413       ConstantOffsetPtrs[&I] = BaseAndOffset;
00414   }
00415 
00416   // This is really weird. Technically, ptrtoint will disable SROA. However,
00417   // unless that ptrtoint is *used* somewhere in the live basic blocks after
00418   // inlining, it will be nuked, and SROA should proceed. All of the uses which
00419   // would block SROA would also block SROA if applied directly to a pointer,
00420   // and so we can just add the integer in here. The only places where SROA is
00421   // preserved either cannot fire on an integer, or won't in-and-of themselves
00422   // disable SROA (ext) w/o some later use that we would see and disable.
00423   Value *SROAArg;
00424   DenseMap<Value *, int>::iterator CostIt;
00425   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00426     SROAArgValues[&I] = SROAArg;
00427 
00428   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00429 }
00430 
00431 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
00432   // Propagate constants through ptrtoint.
00433   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00434   if (!COp)
00435     COp = SimplifiedValues.lookup(I.getOperand(0));
00436   if (COp)
00437     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
00438       SimplifiedValues[&I] = C;
00439       return true;
00440     }
00441 
00442   // Track base/offset pairs when round-tripped through a pointer without
00443   // modifications provided the integer is not too large.
00444   Value *Op = I.getOperand(0);
00445   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
00446   const DataLayout &DL = F.getParent()->getDataLayout();
00447   if (IntegerSize <= DL.getPointerSizeInBits()) {
00448     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
00449     if (BaseAndOffset.first)
00450       ConstantOffsetPtrs[&I] = BaseAndOffset;
00451   }
00452 
00453   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
00454   Value *SROAArg;
00455   DenseMap<Value *, int>::iterator CostIt;
00456   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
00457     SROAArgValues[&I] = SROAArg;
00458 
00459   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00460 }
00461 
00462 bool CallAnalyzer::visitCastInst(CastInst &I) {
00463   // Propagate constants through ptrtoint.
00464   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00465   if (!COp)
00466     COp = SimplifiedValues.lookup(I.getOperand(0));
00467   if (COp)
00468     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
00469       SimplifiedValues[&I] = C;
00470       return true;
00471     }
00472 
00473   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
00474   disableSROA(I.getOperand(0));
00475 
00476   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00477 }
00478 
00479 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
00480   Value *Operand = I.getOperand(0);
00481   Constant *COp = dyn_cast<Constant>(Operand);
00482   if (!COp)
00483     COp = SimplifiedValues.lookup(Operand);
00484   if (COp) {
00485     const DataLayout &DL = F.getParent()->getDataLayout();
00486     if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
00487                                                COp, DL)) {
00488       SimplifiedValues[&I] = C;
00489       return true;
00490     }
00491   }
00492 
00493   // Disable any SROA on the argument to arbitrary unary operators.
00494   disableSROA(Operand);
00495 
00496   return false;
00497 }
00498 
00499 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
00500   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00501   // First try to handle simplified comparisons.
00502   if (!isa<Constant>(LHS))
00503     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00504       LHS = SimpleLHS;
00505   if (!isa<Constant>(RHS))
00506     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00507       RHS = SimpleRHS;
00508   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
00509     if (Constant *CRHS = dyn_cast<Constant>(RHS))
00510       if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
00511         SimplifiedValues[&I] = C;
00512         return true;
00513       }
00514   }
00515 
00516   if (I.getOpcode() == Instruction::FCmp)
00517     return false;
00518 
00519   // Otherwise look for a comparison between constant offset pointers with
00520   // a common base.
00521   Value *LHSBase, *RHSBase;
00522   APInt LHSOffset, RHSOffset;
00523   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00524   if (LHSBase) {
00525     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00526     if (RHSBase && LHSBase == RHSBase) {
00527       // We have common bases, fold the icmp to a constant based on the
00528       // offsets.
00529       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00530       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00531       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
00532         SimplifiedValues[&I] = C;
00533         ++NumConstantPtrCmps;
00534         return true;
00535       }
00536     }
00537   }
00538 
00539   // If the comparison is an equality comparison with null, we can simplify it
00540   // for any alloca-derived argument.
00541   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
00542     if (isAllocaDerivedArg(I.getOperand(0))) {
00543       // We can actually predict the result of comparisons between an
00544       // alloca-derived value and null. Note that this fires regardless of
00545       // SROA firing.
00546       bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
00547       SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
00548                                         : ConstantInt::getFalse(I.getType());
00549       return true;
00550     }
00551 
00552   // Finally check for SROA candidates in comparisons.
00553   Value *SROAArg;
00554   DenseMap<Value *, int>::iterator CostIt;
00555   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00556     if (isa<ConstantPointerNull>(I.getOperand(1))) {
00557       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00558       return true;
00559     }
00560 
00561     disableSROA(CostIt);
00562   }
00563 
00564   return false;
00565 }
00566 
00567 bool CallAnalyzer::visitSub(BinaryOperator &I) {
00568   // Try to handle a special case: we can fold computing the difference of two
00569   // constant-related pointers.
00570   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00571   Value *LHSBase, *RHSBase;
00572   APInt LHSOffset, RHSOffset;
00573   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00574   if (LHSBase) {
00575     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00576     if (RHSBase && LHSBase == RHSBase) {
00577       // We have common bases, fold the subtract to a constant based on the
00578       // offsets.
00579       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00580       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00581       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
00582         SimplifiedValues[&I] = C;
00583         ++NumConstantPtrDiffs;
00584         return true;
00585       }
00586     }
00587   }
00588 
00589   // Otherwise, fall back to the generic logic for simplifying and handling
00590   // instructions.
00591   return Base::visitSub(I);
00592 }
00593 
00594 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
00595   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00596   const DataLayout &DL = F.getParent()->getDataLayout();
00597   if (!isa<Constant>(LHS))
00598     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00599       LHS = SimpleLHS;
00600   if (!isa<Constant>(RHS))
00601     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00602       RHS = SimpleRHS;
00603   Value *SimpleV = nullptr;
00604   if (auto FI = dyn_cast<FPMathOperator>(&I))
00605     SimpleV =
00606         SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
00607   else
00608     SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
00609 
00610   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
00611     SimplifiedValues[&I] = C;
00612     return true;
00613   }
00614 
00615   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
00616   disableSROA(LHS);
00617   disableSROA(RHS);
00618 
00619   return false;
00620 }
00621 
00622 bool CallAnalyzer::visitLoad(LoadInst &I) {
00623   Value *SROAArg;
00624   DenseMap<Value *, int>::iterator CostIt;
00625   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00626     if (I.isSimple()) {
00627       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00628       return true;
00629     }
00630 
00631     disableSROA(CostIt);
00632   }
00633 
00634   return false;
00635 }
00636 
00637 bool CallAnalyzer::visitStore(StoreInst &I) {
00638   Value *SROAArg;
00639   DenseMap<Value *, int>::iterator CostIt;
00640   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
00641     if (I.isSimple()) {
00642       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00643       return true;
00644     }
00645 
00646     disableSROA(CostIt);
00647   }
00648 
00649   return false;
00650 }
00651 
00652 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
00653   // Constant folding for extract value is trivial.
00654   Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
00655   if (!C)
00656     C = SimplifiedValues.lookup(I.getAggregateOperand());
00657   if (C) {
00658     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
00659     return true;
00660   }
00661 
00662   // SROA can look through these but give them a cost.
00663   return false;
00664 }
00665 
00666 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
00667   // Constant folding for insert value is trivial.
00668   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
00669   if (!AggC)
00670     AggC = SimplifiedValues.lookup(I.getAggregateOperand());
00671   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
00672   if (!InsertedC)
00673     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
00674   if (AggC && InsertedC) {
00675     SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
00676                                                         I.getIndices());
00677     return true;
00678   }
00679 
00680   // SROA can look through these but give them a cost.
00681   return false;
00682 }
00683 
00684 /// \brief Try to simplify a call site.
00685 ///
00686 /// Takes a concrete function and callsite and tries to actually simplify it by
00687 /// analyzing the arguments and call itself with instsimplify. Returns true if
00688 /// it has simplified the callsite to some other entity (a constant), making it
00689 /// free.
00690 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
00691   // FIXME: Using the instsimplify logic directly for this is inefficient
00692   // because we have to continually rebuild the argument list even when no
00693   // simplifications can be performed. Until that is fixed with remapping
00694   // inside of instsimplify, directly constant fold calls here.
00695   if (!canConstantFoldCallTo(F))
00696     return false;
00697 
00698   // Try to re-map the arguments to constants.
00699   SmallVector<Constant *, 4> ConstantArgs;
00700   ConstantArgs.reserve(CS.arg_size());
00701   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
00702        I != E; ++I) {
00703     Constant *C = dyn_cast<Constant>(*I);
00704     if (!C)
00705       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
00706     if (!C)
00707       return false; // This argument doesn't map to a constant.
00708 
00709     ConstantArgs.push_back(C);
00710   }
00711   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
00712     SimplifiedValues[CS.getInstruction()] = C;
00713     return true;
00714   }
00715 
00716   return false;
00717 }
00718 
00719 bool CallAnalyzer::visitCallSite(CallSite CS) {
00720   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
00721       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
00722     // This aborts the entire analysis.
00723     ExposesReturnsTwice = true;
00724     return false;
00725   }
00726   if (CS.isCall() &&
00727       cast<CallInst>(CS.getInstruction())->cannotDuplicate())
00728     ContainsNoDuplicateCall = true;
00729 
00730   if (Function *F = CS.getCalledFunction()) {
00731     // When we have a concrete function, first try to simplify it directly.
00732     if (simplifyCallSite(F, CS))
00733       return true;
00734 
00735     // Next check if it is an intrinsic we know about.
00736     // FIXME: Lift this into part of the InstVisitor.
00737     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
00738       switch (II->getIntrinsicID()) {
00739       default:
00740         return Base::visitCallSite(CS);
00741 
00742       case Intrinsic::memset:
00743       case Intrinsic::memcpy:
00744       case Intrinsic::memmove:
00745         // SROA can usually chew through these intrinsics, but they aren't free.
00746         return false;
00747       case Intrinsic::frameescape:
00748         HasFrameEscape = true;
00749         return false;
00750       }
00751     }
00752 
00753     if (F == CS.getInstruction()->getParent()->getParent()) {
00754       // This flag will fully abort the analysis, so don't bother with anything
00755       // else.
00756       IsRecursiveCall = true;
00757       return false;
00758     }
00759 
00760     if (TTI.isLoweredToCall(F)) {
00761       // We account for the average 1 instruction per call argument setup
00762       // here.
00763       Cost += CS.arg_size() * InlineConstants::InstrCost;
00764 
00765       // Everything other than inline ASM will also have a significant cost
00766       // merely from making the call.
00767       if (!isa<InlineAsm>(CS.getCalledValue()))
00768         Cost += InlineConstants::CallPenalty;
00769     }
00770 
00771     return Base::visitCallSite(CS);
00772   }
00773 
00774   // Otherwise we're in a very special case -- an indirect function call. See
00775   // if we can be particularly clever about this.
00776   Value *Callee = CS.getCalledValue();
00777 
00778   // First, pay the price of the argument setup. We account for the average
00779   // 1 instruction per call argument setup here.
00780   Cost += CS.arg_size() * InlineConstants::InstrCost;
00781 
00782   // Next, check if this happens to be an indirect function call to a known
00783   // function in this inline context. If not, we've done all we can.
00784   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
00785   if (!F)
00786     return Base::visitCallSite(CS);
00787 
00788   // If we have a constant that we are calling as a function, we can peer
00789   // through it and see the function target. This happens not infrequently
00790   // during devirtualization and so we want to give it a hefty bonus for
00791   // inlining, but cap that bonus in the event that inlining wouldn't pan
00792   // out. Pretend to inline the function, with a custom threshold.
00793   CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
00794   if (CA.analyzeCall(CS)) {
00795     // We were able to inline the indirect call! Subtract the cost from the
00796     // bonus we want to apply, but don't go below zero.
00797     Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
00798   }
00799 
00800   return Base::visitCallSite(CS);
00801 }
00802 
00803 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
00804   // At least one return instruction will be free after inlining.
00805   bool Free = !HasReturn;
00806   HasReturn = true;
00807   return Free;
00808 }
00809 
00810 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
00811   // We model unconditional branches as essentially free -- they really
00812   // shouldn't exist at all, but handling them makes the behavior of the
00813   // inliner more regular and predictable. Interestingly, conditional branches
00814   // which will fold away are also free.
00815   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
00816          dyn_cast_or_null<ConstantInt>(
00817              SimplifiedValues.lookup(BI.getCondition()));
00818 }
00819 
00820 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
00821   // We model unconditional switches as free, see the comments on handling
00822   // branches.
00823   if (isa<ConstantInt>(SI.getCondition()))
00824     return true;
00825   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
00826     if (isa<ConstantInt>(V))
00827       return true;
00828 
00829   // Otherwise, we need to accumulate a cost proportional to the number of
00830   // distinct successor blocks. This fan-out in the CFG cannot be represented
00831   // for free even if we can represent the core switch as a jumptable that
00832   // takes a single instruction.
00833   //
00834   // NB: We convert large switches which are just used to initialize large phi
00835   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
00836   // inlining those. It will prevent inlining in cases where the optimization
00837   // does not (yet) fire.
00838   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
00839   SuccessorBlocks.insert(SI.getDefaultDest());
00840   for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
00841     SuccessorBlocks.insert(I.getCaseSuccessor());
00842   // Add cost corresponding to the number of distinct destinations. The first
00843   // we model as free because of fallthrough.
00844   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
00845   return false;
00846 }
00847 
00848 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
00849   // We never want to inline functions that contain an indirectbr.  This is
00850   // incorrect because all the blockaddress's (in static global initializers
00851   // for example) would be referring to the original function, and this
00852   // indirect jump would jump from the inlined copy of the function into the
00853   // original function which is extremely undefined behavior.
00854   // FIXME: This logic isn't really right; we can safely inline functions with
00855   // indirectbr's as long as no other function or global references the
00856   // blockaddress of a block within the current function.
00857   HasIndirectBr = true;
00858   return false;
00859 }
00860 
00861 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
00862   // FIXME: It's not clear that a single instruction is an accurate model for
00863   // the inline cost of a resume instruction.
00864   return false;
00865 }
00866 
00867 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
00868   // FIXME: It might be reasonably to discount the cost of instructions leading
00869   // to unreachable as they have the lowest possible impact on both runtime and
00870   // code size.
00871   return true; // No actual code is needed for unreachable.
00872 }
00873 
00874 bool CallAnalyzer::visitInstruction(Instruction &I) {
00875   // Some instructions are free. All of the free intrinsics can also be
00876   // handled by SROA, etc.
00877   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
00878     return true;
00879 
00880   // We found something we don't understand or can't handle. Mark any SROA-able
00881   // values in the operand list as no longer viable.
00882   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
00883     disableSROA(*OI);
00884 
00885   return false;
00886 }
00887 
00888 
00889 /// \brief Analyze a basic block for its contribution to the inline cost.
00890 ///
00891 /// This method walks the analyzer over every instruction in the given basic
00892 /// block and accounts for their cost during inlining at this callsite. It
00893 /// aborts early if the threshold has been exceeded or an impossible to inline
00894 /// construct has been detected. It returns false if inlining is no longer
00895 /// viable, and true if inlining remains viable.
00896 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
00897                                 SmallPtrSetImpl<const Value *> &EphValues) {
00898   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
00899     // FIXME: Currently, the number of instructions in a function regardless of
00900     // our ability to simplify them during inline to constants or dead code,
00901     // are actually used by the vector bonus heuristic. As long as that's true,
00902     // we have to special case debug intrinsics here to prevent differences in
00903     // inlining due to debug symbols. Eventually, the number of unsimplified
00904     // instructions shouldn't factor into the cost computation, but until then,
00905     // hack around it here.
00906     if (isa<DbgInfoIntrinsic>(I))
00907       continue;
00908 
00909     // Skip ephemeral values.
00910     if (EphValues.count(I))
00911       continue;
00912 
00913     ++NumInstructions;
00914     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
00915       ++NumVectorInstructions;
00916 
00917     // If the instruction is floating point, and the target says this operation is
00918     // expensive or the function has the "use-soft-float" attribute, this may
00919     // eventually become a library call.  Treat the cost as such.
00920     if (I->getType()->isFloatingPointTy()) {
00921       bool hasSoftFloatAttr = false;
00922 
00923       // If the function has the "use-soft-float" attribute, mark it as expensive.
00924       if (F.hasFnAttribute("use-soft-float")) {
00925         Attribute Attr = F.getFnAttribute("use-soft-float");
00926         StringRef Val = Attr.getValueAsString();
00927         if (Val == "true")
00928           hasSoftFloatAttr = true;
00929       }
00930 
00931       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
00932           hasSoftFloatAttr)
00933         Cost += InlineConstants::CallPenalty;
00934     }
00935 
00936     // If the instruction simplified to a constant, there is no cost to this
00937     // instruction. Visit the instructions using our InstVisitor to account for
00938     // all of the per-instruction logic. The visit tree returns true if we
00939     // consumed the instruction in any way, and false if the instruction's base
00940     // cost should count against inlining.
00941     if (Base::visit(I))
00942       ++NumInstructionsSimplified;
00943     else
00944       Cost += InlineConstants::InstrCost;
00945 
00946     // If the visit this instruction detected an uninlinable pattern, abort.
00947     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
00948         HasIndirectBr || HasFrameEscape)
00949       return false;
00950 
00951     // If the caller is a recursive function then we don't want to inline
00952     // functions which allocate a lot of stack space because it would increase
00953     // the caller stack usage dramatically.
00954     if (IsCallerRecursive &&
00955         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
00956       return false;
00957 
00958     // Check if we've past the maximum possible threshold so we don't spin in
00959     // huge basic blocks that will never inline.
00960     if (Cost > Threshold)
00961       return false;
00962   }
00963 
00964   return true;
00965 }
00966 
00967 /// \brief Compute the base pointer and cumulative constant offsets for V.
00968 ///
00969 /// This strips all constant offsets off of V, leaving it the base pointer, and
00970 /// accumulates the total constant offset applied in the returned constant. It
00971 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
00972 /// no constant offsets applied.
00973 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
00974   if (!V->getType()->isPointerTy())
00975     return nullptr;
00976 
00977   const DataLayout &DL = F.getParent()->getDataLayout();
00978   unsigned IntPtrWidth = DL.getPointerSizeInBits();
00979   APInt Offset = APInt::getNullValue(IntPtrWidth);
00980 
00981   // Even though we don't look through PHI nodes, we could be called on an
00982   // instruction in an unreachable block, which may be on a cycle.
00983   SmallPtrSet<Value *, 4> Visited;
00984   Visited.insert(V);
00985   do {
00986     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
00987       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
00988         return nullptr;
00989       V = GEP->getPointerOperand();
00990     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
00991       V = cast<Operator>(V)->getOperand(0);
00992     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
00993       if (GA->mayBeOverridden())
00994         break;
00995       V = GA->getAliasee();
00996     } else {
00997       break;
00998     }
00999     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
01000   } while (Visited.insert(V).second);
01001 
01002   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
01003   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
01004 }
01005 
01006 /// \brief Analyze a call site for potential inlining.
01007 ///
01008 /// Returns true if inlining this call is viable, and false if it is not
01009 /// viable. It computes the cost and adjusts the threshold based on numerous
01010 /// factors and heuristics. If this method returns false but the computed cost
01011 /// is below the computed threshold, then inlining was forcibly disabled by
01012 /// some artifact of the routine.
01013 bool CallAnalyzer::analyzeCall(CallSite CS) {
01014   ++NumCallsAnalyzed;
01015 
01016   // Perform some tweaks to the cost and threshold based on the direct
01017   // callsite information.
01018 
01019   // We want to more aggressively inline vector-dense kernels, so up the
01020   // threshold, and we'll lower it if the % of vector instructions gets too
01021   // low. Note that these bonuses are some what arbitrary and evolved over time
01022   // by accident as much as because they are principled bonuses.
01023   //
01024   // FIXME: It would be nice to remove all such bonuses. At least it would be
01025   // nice to base the bonus values on something more scientific.
01026   assert(NumInstructions == 0);
01027   assert(NumVectorInstructions == 0);
01028   FiftyPercentVectorBonus = 3 * Threshold / 2;
01029   TenPercentVectorBonus = 3 * Threshold / 4;
01030   const DataLayout &DL = F.getParent()->getDataLayout();
01031 
01032   // Track whether the post-inlining function would have more than one basic
01033   // block. A single basic block is often intended for inlining. Balloon the
01034   // threshold by 50% until we pass the single-BB phase.
01035   bool SingleBB = true;
01036   int SingleBBBonus = Threshold / 2;
01037 
01038   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
01039   // this Threshold any time, and cost cannot decrease, we can stop processing
01040   // the rest of the function body.
01041   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
01042 
01043   // Give out bonuses per argument, as the instructions setting them up will
01044   // be gone after inlining.
01045   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
01046     if (CS.isByValArgument(I)) {
01047       // We approximate the number of loads and stores needed by dividing the
01048       // size of the byval type by the target's pointer size.
01049       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
01050       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
01051       unsigned PointerSize = DL.getPointerSizeInBits();
01052       // Ceiling division.
01053       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
01054 
01055       // If it generates more than 8 stores it is likely to be expanded as an
01056       // inline memcpy so we take that as an upper bound. Otherwise we assume
01057       // one load and one store per word copied.
01058       // FIXME: The maxStoresPerMemcpy setting from the target should be used
01059       // here instead of a magic number of 8, but it's not available via
01060       // DataLayout.
01061       NumStores = std::min(NumStores, 8U);
01062 
01063       Cost -= 2 * NumStores * InlineConstants::InstrCost;
01064     } else {
01065       // For non-byval arguments subtract off one instruction per call
01066       // argument.
01067       Cost -= InlineConstants::InstrCost;
01068     }
01069   }
01070 
01071   // If there is only one call of the function, and it has internal linkage,
01072   // the cost of inlining it drops dramatically.
01073   bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
01074     &F == CS.getCalledFunction();
01075   if (OnlyOneCallAndLocalLinkage)
01076     Cost += InlineConstants::LastCallToStaticBonus;
01077 
01078   // If the instruction after the call, or if the normal destination of the
01079   // invoke is an unreachable instruction, the function is noreturn. As such,
01080   // there is little point in inlining this unless there is literally zero
01081   // cost.
01082   Instruction *Instr = CS.getInstruction();
01083   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
01084     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
01085       Threshold = 0;
01086   } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
01087     Threshold = 0;
01088 
01089   // If this function uses the coldcc calling convention, prefer not to inline
01090   // it.
01091   if (F.getCallingConv() == CallingConv::Cold)
01092     Cost += InlineConstants::ColdccPenalty;
01093 
01094   // Check if we're done. This can happen due to bonuses and penalties.
01095   if (Cost > Threshold)
01096     return false;
01097 
01098   if (F.empty())
01099     return true;
01100 
01101   Function *Caller = CS.getInstruction()->getParent()->getParent();
01102   // Check if the caller function is recursive itself.
01103   for (User *U : Caller->users()) {
01104     CallSite Site(U);
01105     if (!Site)
01106       continue;
01107     Instruction *I = Site.getInstruction();
01108     if (I->getParent()->getParent() == Caller) {
01109       IsCallerRecursive = true;
01110       break;
01111     }
01112   }
01113 
01114   // Populate our simplified values by mapping from function arguments to call
01115   // arguments with known important simplifications.
01116   CallSite::arg_iterator CAI = CS.arg_begin();
01117   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
01118        FAI != FAE; ++FAI, ++CAI) {
01119     assert(CAI != CS.arg_end());
01120     if (Constant *C = dyn_cast<Constant>(CAI))
01121       SimplifiedValues[FAI] = C;
01122 
01123     Value *PtrArg = *CAI;
01124     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
01125       ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
01126 
01127       // We can SROA any pointer arguments derived from alloca instructions.
01128       if (isa<AllocaInst>(PtrArg)) {
01129         SROAArgValues[FAI] = PtrArg;
01130         SROAArgCosts[PtrArg] = 0;
01131       }
01132     }
01133   }
01134   NumConstantArgs = SimplifiedValues.size();
01135   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
01136   NumAllocaArgs = SROAArgValues.size();
01137 
01138   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
01139   // the ephemeral values multiple times (and they're completely determined by
01140   // the callee, so this is purely duplicate work).
01141   SmallPtrSet<const Value *, 32> EphValues;
01142   CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
01143 
01144   // The worklist of live basic blocks in the callee *after* inlining. We avoid
01145   // adding basic blocks of the callee which can be proven to be dead for this
01146   // particular call site in order to get more accurate cost estimates. This
01147   // requires a somewhat heavyweight iteration pattern: we need to walk the
01148   // basic blocks in a breadth-first order as we insert live successors. To
01149   // accomplish this, prioritizing for small iterations because we exit after
01150   // crossing our threshold, we use a small-size optimized SetVector.
01151   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
01152                                   SmallPtrSet<BasicBlock *, 16> > BBSetVector;
01153   BBSetVector BBWorklist;
01154   BBWorklist.insert(&F.getEntryBlock());
01155   // Note that we *must not* cache the size, this loop grows the worklist.
01156   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
01157     // Bail out the moment we cross the threshold. This means we'll under-count
01158     // the cost, but only when undercounting doesn't matter.
01159     if (Cost > Threshold)
01160       break;
01161 
01162     BasicBlock *BB = BBWorklist[Idx];
01163     if (BB->empty())
01164       continue;
01165 
01166     // Disallow inlining a blockaddress. A blockaddress only has defined
01167     // behavior for an indirect branch in the same function, and we do not
01168     // currently support inlining indirect branches. But, the inliner may not
01169     // see an indirect branch that ends up being dead code at a particular call
01170     // site. If the blockaddress escapes the function, e.g., via a global
01171     // variable, inlining may lead to an invalid cross-function reference.
01172     if (BB->hasAddressTaken())
01173       return false;
01174 
01175     // Analyze the cost of this block. If we blow through the threshold, this
01176     // returns false, and we can bail on out.
01177     if (!analyzeBlock(BB, EphValues)) {
01178       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
01179           HasIndirectBr || HasFrameEscape)
01180         return false;
01181 
01182       // If the caller is a recursive function then we don't want to inline
01183       // functions which allocate a lot of stack space because it would increase
01184       // the caller stack usage dramatically.
01185       if (IsCallerRecursive &&
01186           AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
01187         return false;
01188 
01189       break;
01190     }
01191 
01192     TerminatorInst *TI = BB->getTerminator();
01193 
01194     // Add in the live successors by first checking whether we have terminator
01195     // that may be simplified based on the values simplified by this call.
01196     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
01197       if (BI->isConditional()) {
01198         Value *Cond = BI->getCondition();
01199         if (ConstantInt *SimpleCond
01200               = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01201           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
01202           continue;
01203         }
01204       }
01205     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
01206       Value *Cond = SI->getCondition();
01207       if (ConstantInt *SimpleCond
01208             = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01209         BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
01210         continue;
01211       }
01212     }
01213 
01214     // If we're unable to select a particular successor, just count all of
01215     // them.
01216     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
01217          ++TIdx)
01218       BBWorklist.insert(TI->getSuccessor(TIdx));
01219 
01220     // If we had any successors at this point, than post-inlining is likely to
01221     // have them as well. Note that we assume any basic blocks which existed
01222     // due to branches or switches which folded above will also fold after
01223     // inlining.
01224     if (SingleBB && TI->getNumSuccessors() > 1) {
01225       // Take off the bonus we applied to the threshold.
01226       Threshold -= SingleBBBonus;
01227       SingleBB = false;
01228     }
01229   }
01230 
01231   // If this is a noduplicate call, we can still inline as long as
01232   // inlining this would cause the removal of the caller (so the instruction
01233   // is not actually duplicated, just moved).
01234   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
01235     return false;
01236 
01237   // We applied the maximum possible vector bonus at the beginning. Now,
01238   // subtract the excess bonus, if any, from the Threshold before
01239   // comparing against Cost.
01240   if (NumVectorInstructions <= NumInstructions / 10)
01241     Threshold -= FiftyPercentVectorBonus;
01242   else if (NumVectorInstructions <= NumInstructions / 2)
01243     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
01244 
01245   return Cost < Threshold;
01246 }
01247 
01248 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01249 /// \brief Dump stats about this call's analysis.
01250 void CallAnalyzer::dump() {
01251 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
01252   DEBUG_PRINT_STAT(NumConstantArgs);
01253   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
01254   DEBUG_PRINT_STAT(NumAllocaArgs);
01255   DEBUG_PRINT_STAT(NumConstantPtrCmps);
01256   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
01257   DEBUG_PRINT_STAT(NumInstructionsSimplified);
01258   DEBUG_PRINT_STAT(NumInstructions);
01259   DEBUG_PRINT_STAT(SROACostSavings);
01260   DEBUG_PRINT_STAT(SROACostSavingsLost);
01261   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
01262   DEBUG_PRINT_STAT(Cost);
01263   DEBUG_PRINT_STAT(Threshold);
01264 #undef DEBUG_PRINT_STAT
01265 }
01266 #endif
01267 
01268 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01269                       true, true)
01270 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
01271 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
01272 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01273                     true, true)
01274 
01275 char InlineCostAnalysis::ID = 0;
01276 
01277 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
01278 
01279 InlineCostAnalysis::~InlineCostAnalysis() {}
01280 
01281 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
01282   AU.setPreservesAll();
01283   AU.addRequired<AssumptionCacheTracker>();
01284   AU.addRequired<TargetTransformInfoWrapperPass>();
01285   CallGraphSCCPass::getAnalysisUsage(AU);
01286 }
01287 
01288 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
01289   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
01290   ACT = &getAnalysis<AssumptionCacheTracker>();
01291   return false;
01292 }
01293 
01294 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
01295   return getInlineCost(CS, CS.getCalledFunction(), Threshold);
01296 }
01297 
01298 /// \brief Test that two functions either have or have not the given attribute
01299 ///        at the same time.
01300 template<typename AttrKind>
01301 static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
01302   return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
01303 }
01304 
01305 /// \brief Test that there are no attribute conflicts between Caller and Callee
01306 ///        that prevent inlining.
01307 static bool functionsHaveCompatibleAttributes(Function *Caller,
01308                                               Function *Callee) {
01309   return attributeMatches(Caller, Callee, "target-cpu") &&
01310          attributeMatches(Caller, Callee, "target-features") &&
01311          attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
01312          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
01313          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
01314 }
01315 
01316 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
01317                                              int Threshold) {
01318   // Cannot inline indirect calls.
01319   if (!Callee)
01320     return llvm::InlineCost::getNever();
01321 
01322   // Calls to functions with always-inline attributes should be inlined
01323   // whenever possible.
01324   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
01325     if (isInlineViable(*Callee))
01326       return llvm::InlineCost::getAlways();
01327     return llvm::InlineCost::getNever();
01328   }
01329 
01330   // Never inline functions with conflicting attributes (unless callee has
01331   // always-inline attribute).
01332   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
01333     return llvm::InlineCost::getNever();
01334 
01335   // Don't inline this call if the caller has the optnone attribute.
01336   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
01337     return llvm::InlineCost::getNever();
01338 
01339   // Don't inline functions which can be redefined at link-time to mean
01340   // something else.  Don't inline functions marked noinline or call sites
01341   // marked noinline.
01342   if (Callee->mayBeOverridden() ||
01343       Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
01344     return llvm::InlineCost::getNever();
01345 
01346   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
01347         << "...\n");
01348 
01349   CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
01350   bool ShouldInline = CA.analyzeCall(CS);
01351 
01352   DEBUG(CA.dump());
01353 
01354   // Check if there was a reason to force inlining or no inlining.
01355   if (!ShouldInline && CA.getCost() < CA.getThreshold())
01356     return InlineCost::getNever();
01357   if (ShouldInline && CA.getCost() >= CA.getThreshold())
01358     return InlineCost::getAlways();
01359 
01360   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
01361 }
01362 
01363 bool InlineCostAnalysis::isInlineViable(Function &F) {
01364   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
01365   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
01366     // Disallow inlining of functions which contain indirect branches or
01367     // blockaddresses.
01368     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
01369       return false;
01370 
01371     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
01372          ++II) {
01373       CallSite CS(II);
01374       if (!CS)
01375         continue;
01376 
01377       // Disallow recursive calls.
01378       if (&F == CS.getCalledFunction())
01379         return false;
01380 
01381       // Disallow calls which expose returns-twice to a function not previously
01382       // attributed as such.
01383       if (!ReturnsTwice && CS.isCall() &&
01384           cast<CallInst>(CS.getInstruction())->canReturnTwice())
01385         return false;
01386 
01387       // Disallow inlining functions that call @llvm.frameescape. Doing this
01388       // correctly would require major changes to the inliner.
01389       if (CS.getCalledFunction() &&
01390           CS.getCalledFunction()->getIntrinsicID() ==
01391               llvm::Intrinsic::frameescape)
01392         return false;
01393     }
01394   }
01395 
01396   return true;
01397 }