LLVM API Documentation
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements inline cost analysis. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #define DEBUG_TYPE "inline-cost" 00015 #include "llvm/Analysis/InlineCost.h" 00016 #include "llvm/ADT/STLExtras.h" 00017 #include "llvm/ADT/SetVector.h" 00018 #include "llvm/ADT/SmallPtrSet.h" 00019 #include "llvm/ADT/SmallVector.h" 00020 #include "llvm/ADT/Statistic.h" 00021 #include "llvm/Analysis/ConstantFolding.h" 00022 #include "llvm/Analysis/InstructionSimplify.h" 00023 #include "llvm/Analysis/TargetTransformInfo.h" 00024 #include "llvm/IR/CallingConv.h" 00025 #include "llvm/IR/DataLayout.h" 00026 #include "llvm/IR/GlobalAlias.h" 00027 #include "llvm/IR/IntrinsicInst.h" 00028 #include "llvm/IR/Operator.h" 00029 #include "llvm/InstVisitor.h" 00030 #include "llvm/Support/CallSite.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/Support/GetElementPtrTypeIterator.h" 00033 #include "llvm/Support/raw_ostream.h" 00034 00035 using namespace llvm; 00036 00037 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 00038 00039 namespace { 00040 00041 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 00042 typedef InstVisitor<CallAnalyzer, bool> Base; 00043 friend class InstVisitor<CallAnalyzer, bool>; 00044 00045 // DataLayout if available, or null. 00046 const DataLayout *const TD; 00047 00048 /// The TargetTransformInfo available for this compilation. 00049 const TargetTransformInfo &TTI; 00050 00051 // The called function. 00052 Function &F; 00053 00054 int Threshold; 00055 int Cost; 00056 00057 bool IsCallerRecursive; 00058 bool IsRecursiveCall; 00059 bool ExposesReturnsTwice; 00060 bool HasDynamicAlloca; 00061 bool ContainsNoDuplicateCall; 00062 00063 /// Number of bytes allocated statically by the callee. 00064 uint64_t AllocatedSize; 00065 unsigned NumInstructions, NumVectorInstructions; 00066 int FiftyPercentVectorBonus, TenPercentVectorBonus; 00067 int VectorBonus; 00068 00069 // While we walk the potentially-inlined instructions, we build up and 00070 // maintain a mapping of simplified values specific to this callsite. The 00071 // idea is to propagate any special information we have about arguments to 00072 // this call through the inlinable section of the function, and account for 00073 // likely simplifications post-inlining. The most important aspect we track 00074 // is CFG altering simplifications -- when we prove a basic block dead, that 00075 // can cause dramatic shifts in the cost of inlining a function. 00076 DenseMap<Value *, Constant *> SimplifiedValues; 00077 00078 // Keep track of the values which map back (through function arguments) to 00079 // allocas on the caller stack which could be simplified through SROA. 00080 DenseMap<Value *, Value *> SROAArgValues; 00081 00082 // The mapping of caller Alloca values to their accumulated cost savings. If 00083 // we have to disable SROA for one of the allocas, this tells us how much 00084 // cost must be added. 00085 DenseMap<Value *, int> SROAArgCosts; 00086 00087 // Keep track of values which map to a pointer base and constant offset. 00088 DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; 00089 00090 // Custom simplification helper routines. 00091 bool isAllocaDerivedArg(Value *V); 00092 bool lookupSROAArgAndCost(Value *V, Value *&Arg, 00093 DenseMap<Value *, int>::iterator &CostIt); 00094 void disableSROA(DenseMap<Value *, int>::iterator CostIt); 00095 void disableSROA(Value *V); 00096 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 00097 int InstructionCost); 00098 bool handleSROACandidate(bool IsSROAValid, 00099 DenseMap<Value *, int>::iterator CostIt, 00100 int InstructionCost); 00101 bool isGEPOffsetConstant(GetElementPtrInst &GEP); 00102 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 00103 bool simplifyCallSite(Function *F, CallSite CS); 00104 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 00105 00106 // Custom analysis routines. 00107 bool analyzeBlock(BasicBlock *BB); 00108 00109 // Disable several entry points to the visitor so we don't accidentally use 00110 // them by declaring but not defining them here. 00111 void visit(Module *); void visit(Module &); 00112 void visit(Function *); void visit(Function &); 00113 void visit(BasicBlock *); void visit(BasicBlock &); 00114 00115 // Provide base case for our instruction visit. 00116 bool visitInstruction(Instruction &I); 00117 00118 // Our visit overrides. 00119 bool visitAlloca(AllocaInst &I); 00120 bool visitPHI(PHINode &I); 00121 bool visitGetElementPtr(GetElementPtrInst &I); 00122 bool visitBitCast(BitCastInst &I); 00123 bool visitPtrToInt(PtrToIntInst &I); 00124 bool visitIntToPtr(IntToPtrInst &I); 00125 bool visitCastInst(CastInst &I); 00126 bool visitUnaryInstruction(UnaryInstruction &I); 00127 bool visitICmp(ICmpInst &I); 00128 bool visitSub(BinaryOperator &I); 00129 bool visitBinaryOperator(BinaryOperator &I); 00130 bool visitLoad(LoadInst &I); 00131 bool visitStore(StoreInst &I); 00132 bool visitExtractValue(ExtractValueInst &I); 00133 bool visitInsertValue(InsertValueInst &I); 00134 bool visitCallSite(CallSite CS); 00135 00136 public: 00137 CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, 00138 Function &Callee, int Threshold) 00139 : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), 00140 IsCallerRecursive(false), IsRecursiveCall(false), 00141 ExposesReturnsTwice(false), HasDynamicAlloca(false), 00142 ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), 00143 NumVectorInstructions(0), FiftyPercentVectorBonus(0), 00144 TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), 00145 NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), 00146 NumConstantPtrDiffs(0), NumInstructionsSimplified(0), 00147 SROACostSavings(0), SROACostSavingsLost(0) {} 00148 00149 bool analyzeCall(CallSite CS); 00150 00151 int getThreshold() { return Threshold; } 00152 int getCost() { return Cost; } 00153 00154 // Keep a bunch of stats about the cost savings found so we can print them 00155 // out when debugging. 00156 unsigned NumConstantArgs; 00157 unsigned NumConstantOffsetPtrArgs; 00158 unsigned NumAllocaArgs; 00159 unsigned NumConstantPtrCmps; 00160 unsigned NumConstantPtrDiffs; 00161 unsigned NumInstructionsSimplified; 00162 unsigned SROACostSavings; 00163 unsigned SROACostSavingsLost; 00164 00165 void dump(); 00166 }; 00167 00168 } // namespace 00169 00170 /// \brief Test whether the given value is an Alloca-derived function argument. 00171 bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 00172 return SROAArgValues.count(V); 00173 } 00174 00175 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. 00176 /// Returns false if V does not map to a SROA-candidate. 00177 bool CallAnalyzer::lookupSROAArgAndCost( 00178 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { 00179 if (SROAArgValues.empty() || SROAArgCosts.empty()) 00180 return false; 00181 00182 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); 00183 if (ArgIt == SROAArgValues.end()) 00184 return false; 00185 00186 Arg = ArgIt->second; 00187 CostIt = SROAArgCosts.find(Arg); 00188 return CostIt != SROAArgCosts.end(); 00189 } 00190 00191 /// \brief Disable SROA for the candidate marked by this cost iterator. 00192 /// 00193 /// This marks the candidate as no longer viable for SROA, and adds the cost 00194 /// savings associated with it back into the inline cost measurement. 00195 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { 00196 // If we're no longer able to perform SROA we need to undo its cost savings 00197 // and prevent subsequent analysis. 00198 Cost += CostIt->second; 00199 SROACostSavings -= CostIt->second; 00200 SROACostSavingsLost += CostIt->second; 00201 SROAArgCosts.erase(CostIt); 00202 } 00203 00204 /// \brief If 'V' maps to a SROA candidate, disable SROA for it. 00205 void CallAnalyzer::disableSROA(Value *V) { 00206 Value *SROAArg; 00207 DenseMap<Value *, int>::iterator CostIt; 00208 if (lookupSROAArgAndCost(V, SROAArg, CostIt)) 00209 disableSROA(CostIt); 00210 } 00211 00212 /// \brief Accumulate the given cost for a particular SROA candidate. 00213 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 00214 int InstructionCost) { 00215 CostIt->second += InstructionCost; 00216 SROACostSavings += InstructionCost; 00217 } 00218 00219 /// \brief Helper for the common pattern of handling a SROA candidate. 00220 /// Either accumulates the cost savings if the SROA remains valid, or disables 00221 /// SROA for the candidate. 00222 bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, 00223 DenseMap<Value *, int>::iterator CostIt, 00224 int InstructionCost) { 00225 if (IsSROAValid) { 00226 accumulateSROACost(CostIt, InstructionCost); 00227 return true; 00228 } 00229 00230 disableSROA(CostIt); 00231 return false; 00232 } 00233 00234 /// \brief Check whether a GEP's indices are all constant. 00235 /// 00236 /// Respects any simplified values known during the analysis of this callsite. 00237 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { 00238 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 00239 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) 00240 return false; 00241 00242 return true; 00243 } 00244 00245 /// \brief Accumulate a constant GEP offset into an APInt if possible. 00246 /// 00247 /// Returns false if unable to compute the offset for any reason. Respects any 00248 /// simplified values known during the analysis of this callsite. 00249 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 00250 if (!TD) 00251 return false; 00252 00253 unsigned IntPtrWidth = TD->getPointerSizeInBits(); 00254 assert(IntPtrWidth == Offset.getBitWidth()); 00255 00256 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 00257 GTI != GTE; ++GTI) { 00258 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 00259 if (!OpC) 00260 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 00261 OpC = dyn_cast<ConstantInt>(SimpleOp); 00262 if (!OpC) 00263 return false; 00264 if (OpC->isZero()) continue; 00265 00266 // Handle a struct index, which adds its field offset to the pointer. 00267 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 00268 unsigned ElementIdx = OpC->getZExtValue(); 00269 const StructLayout *SL = TD->getStructLayout(STy); 00270 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 00271 continue; 00272 } 00273 00274 APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); 00275 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 00276 } 00277 return true; 00278 } 00279 00280 bool CallAnalyzer::visitAlloca(AllocaInst &I) { 00281 // FIXME: Check whether inlining will turn a dynamic alloca into a static 00282 // alloca, and handle that case. 00283 00284 // Accumulate the allocated size. 00285 if (I.isStaticAlloca()) { 00286 Type *Ty = I.getAllocatedType(); 00287 AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : 00288 Ty->getPrimitiveSizeInBits()); 00289 } 00290 00291 // We will happily inline static alloca instructions. 00292 if (I.isStaticAlloca()) 00293 return Base::visitAlloca(I); 00294 00295 // FIXME: This is overly conservative. Dynamic allocas are inefficient for 00296 // a variety of reasons, and so we would like to not inline them into 00297 // functions which don't currently have a dynamic alloca. This simply 00298 // disables inlining altogether in the presence of a dynamic alloca. 00299 HasDynamicAlloca = true; 00300 return false; 00301 } 00302 00303 bool CallAnalyzer::visitPHI(PHINode &I) { 00304 // FIXME: We should potentially be tracking values through phi nodes, 00305 // especially when they collapse to a single value due to deleted CFG edges 00306 // during inlining. 00307 00308 // FIXME: We need to propagate SROA *disabling* through phi nodes, even 00309 // though we don't want to propagate it's bonuses. The idea is to disable 00310 // SROA if it *might* be used in an inappropriate manner. 00311 00312 // Phi nodes are always zero-cost. 00313 return true; 00314 } 00315 00316 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 00317 Value *SROAArg; 00318 DenseMap<Value *, int>::iterator CostIt; 00319 bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), 00320 SROAArg, CostIt); 00321 00322 // Try to fold GEPs of constant-offset call site argument pointers. This 00323 // requires target data and inbounds GEPs. 00324 if (TD && I.isInBounds()) { 00325 // Check if we have a base + offset for the pointer. 00326 Value *Ptr = I.getPointerOperand(); 00327 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); 00328 if (BaseAndOffset.first) { 00329 // Check if the offset of this GEP is constant, and if so accumulate it 00330 // into Offset. 00331 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { 00332 // Non-constant GEPs aren't folded, and disable SROA. 00333 if (SROACandidate) 00334 disableSROA(CostIt); 00335 return false; 00336 } 00337 00338 // Add the result as a new mapping to Base + Offset. 00339 ConstantOffsetPtrs[&I] = BaseAndOffset; 00340 00341 // Also handle SROA candidates here, we already know that the GEP is 00342 // all-constant indexed. 00343 if (SROACandidate) 00344 SROAArgValues[&I] = SROAArg; 00345 00346 return true; 00347 } 00348 } 00349 00350 if (isGEPOffsetConstant(I)) { 00351 if (SROACandidate) 00352 SROAArgValues[&I] = SROAArg; 00353 00354 // Constant GEPs are modeled as free. 00355 return true; 00356 } 00357 00358 // Variable GEPs will require math and will disable SROA. 00359 if (SROACandidate) 00360 disableSROA(CostIt); 00361 return false; 00362 } 00363 00364 bool CallAnalyzer::visitBitCast(BitCastInst &I) { 00365 // Propagate constants through bitcasts. 00366 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00367 if (!COp) 00368 COp = SimplifiedValues.lookup(I.getOperand(0)); 00369 if (COp) 00370 if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { 00371 SimplifiedValues[&I] = C; 00372 return true; 00373 } 00374 00375 // Track base/offsets through casts 00376 std::pair<Value *, APInt> BaseAndOffset 00377 = ConstantOffsetPtrs.lookup(I.getOperand(0)); 00378 // Casts don't change the offset, just wrap it up. 00379 if (BaseAndOffset.first) 00380 ConstantOffsetPtrs[&I] = BaseAndOffset; 00381 00382 // Also look for SROA candidates here. 00383 Value *SROAArg; 00384 DenseMap<Value *, int>::iterator CostIt; 00385 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 00386 SROAArgValues[&I] = SROAArg; 00387 00388 // Bitcasts are always zero cost. 00389 return true; 00390 } 00391 00392 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 00393 // Propagate constants through ptrtoint. 00394 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00395 if (!COp) 00396 COp = SimplifiedValues.lookup(I.getOperand(0)); 00397 if (COp) 00398 if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { 00399 SimplifiedValues[&I] = C; 00400 return true; 00401 } 00402 00403 // Track base/offset pairs when converted to a plain integer provided the 00404 // integer is large enough to represent the pointer. 00405 unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 00406 if (TD && IntegerSize >= TD->getPointerSizeInBits()) { 00407 std::pair<Value *, APInt> BaseAndOffset 00408 = ConstantOffsetPtrs.lookup(I.getOperand(0)); 00409 if (BaseAndOffset.first) 00410 ConstantOffsetPtrs[&I] = BaseAndOffset; 00411 } 00412 00413 // This is really weird. Technically, ptrtoint will disable SROA. However, 00414 // unless that ptrtoint is *used* somewhere in the live basic blocks after 00415 // inlining, it will be nuked, and SROA should proceed. All of the uses which 00416 // would block SROA would also block SROA if applied directly to a pointer, 00417 // and so we can just add the integer in here. The only places where SROA is 00418 // preserved either cannot fire on an integer, or won't in-and-of themselves 00419 // disable SROA (ext) w/o some later use that we would see and disable. 00420 Value *SROAArg; 00421 DenseMap<Value *, int>::iterator CostIt; 00422 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 00423 SROAArgValues[&I] = SROAArg; 00424 00425 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00426 } 00427 00428 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 00429 // Propagate constants through ptrtoint. 00430 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00431 if (!COp) 00432 COp = SimplifiedValues.lookup(I.getOperand(0)); 00433 if (COp) 00434 if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { 00435 SimplifiedValues[&I] = C; 00436 return true; 00437 } 00438 00439 // Track base/offset pairs when round-tripped through a pointer without 00440 // modifications provided the integer is not too large. 00441 Value *Op = I.getOperand(0); 00442 unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 00443 if (TD && IntegerSize <= TD->getPointerSizeInBits()) { 00444 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 00445 if (BaseAndOffset.first) 00446 ConstantOffsetPtrs[&I] = BaseAndOffset; 00447 } 00448 00449 // "Propagate" SROA here in the same manner as we do for ptrtoint above. 00450 Value *SROAArg; 00451 DenseMap<Value *, int>::iterator CostIt; 00452 if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) 00453 SROAArgValues[&I] = SROAArg; 00454 00455 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00456 } 00457 00458 bool CallAnalyzer::visitCastInst(CastInst &I) { 00459 // Propagate constants through ptrtoint. 00460 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00461 if (!COp) 00462 COp = SimplifiedValues.lookup(I.getOperand(0)); 00463 if (COp) 00464 if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { 00465 SimplifiedValues[&I] = C; 00466 return true; 00467 } 00468 00469 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. 00470 disableSROA(I.getOperand(0)); 00471 00472 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00473 } 00474 00475 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { 00476 Value *Operand = I.getOperand(0); 00477 Constant *COp = dyn_cast<Constant>(Operand); 00478 if (!COp) 00479 COp = SimplifiedValues.lookup(Operand); 00480 if (COp) 00481 if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), 00482 COp, TD)) { 00483 SimplifiedValues[&I] = C; 00484 return true; 00485 } 00486 00487 // Disable any SROA on the argument to arbitrary unary operators. 00488 disableSROA(Operand); 00489 00490 return false; 00491 } 00492 00493 bool CallAnalyzer::visitICmp(ICmpInst &I) { 00494 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00495 // First try to handle simplified comparisons. 00496 if (!isa<Constant>(LHS)) 00497 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 00498 LHS = SimpleLHS; 00499 if (!isa<Constant>(RHS)) 00500 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 00501 RHS = SimpleRHS; 00502 if (Constant *CLHS = dyn_cast<Constant>(LHS)) 00503 if (Constant *CRHS = dyn_cast<Constant>(RHS)) 00504 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 00505 SimplifiedValues[&I] = C; 00506 return true; 00507 } 00508 00509 // Otherwise look for a comparison between constant offset pointers with 00510 // a common base. 00511 Value *LHSBase, *RHSBase; 00512 APInt LHSOffset, RHSOffset; 00513 llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 00514 if (LHSBase) { 00515 llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 00516 if (RHSBase && LHSBase == RHSBase) { 00517 // We have common bases, fold the icmp to a constant based on the 00518 // offsets. 00519 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 00520 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 00521 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 00522 SimplifiedValues[&I] = C; 00523 ++NumConstantPtrCmps; 00524 return true; 00525 } 00526 } 00527 } 00528 00529 // If the comparison is an equality comparison with null, we can simplify it 00530 // for any alloca-derived argument. 00531 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) 00532 if (isAllocaDerivedArg(I.getOperand(0))) { 00533 // We can actually predict the result of comparisons between an 00534 // alloca-derived value and null. Note that this fires regardless of 00535 // SROA firing. 00536 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 00537 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 00538 : ConstantInt::getFalse(I.getType()); 00539 return true; 00540 } 00541 00542 // Finally check for SROA candidates in comparisons. 00543 Value *SROAArg; 00544 DenseMap<Value *, int>::iterator CostIt; 00545 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00546 if (isa<ConstantPointerNull>(I.getOperand(1))) { 00547 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00548 return true; 00549 } 00550 00551 disableSROA(CostIt); 00552 } 00553 00554 return false; 00555 } 00556 00557 bool CallAnalyzer::visitSub(BinaryOperator &I) { 00558 // Try to handle a special case: we can fold computing the difference of two 00559 // constant-related pointers. 00560 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00561 Value *LHSBase, *RHSBase; 00562 APInt LHSOffset, RHSOffset; 00563 llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 00564 if (LHSBase) { 00565 llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 00566 if (RHSBase && LHSBase == RHSBase) { 00567 // We have common bases, fold the subtract to a constant based on the 00568 // offsets. 00569 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 00570 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 00571 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 00572 SimplifiedValues[&I] = C; 00573 ++NumConstantPtrDiffs; 00574 return true; 00575 } 00576 } 00577 } 00578 00579 // Otherwise, fall back to the generic logic for simplifying and handling 00580 // instructions. 00581 return Base::visitSub(I); 00582 } 00583 00584 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 00585 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00586 if (!isa<Constant>(LHS)) 00587 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 00588 LHS = SimpleLHS; 00589 if (!isa<Constant>(RHS)) 00590 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 00591 RHS = SimpleRHS; 00592 Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); 00593 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { 00594 SimplifiedValues[&I] = C; 00595 return true; 00596 } 00597 00598 // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 00599 disableSROA(LHS); 00600 disableSROA(RHS); 00601 00602 return false; 00603 } 00604 00605 bool CallAnalyzer::visitLoad(LoadInst &I) { 00606 Value *SROAArg; 00607 DenseMap<Value *, int>::iterator CostIt; 00608 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00609 if (I.isSimple()) { 00610 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00611 return true; 00612 } 00613 00614 disableSROA(CostIt); 00615 } 00616 00617 return false; 00618 } 00619 00620 bool CallAnalyzer::visitStore(StoreInst &I) { 00621 Value *SROAArg; 00622 DenseMap<Value *, int>::iterator CostIt; 00623 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00624 if (I.isSimple()) { 00625 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00626 return true; 00627 } 00628 00629 disableSROA(CostIt); 00630 } 00631 00632 return false; 00633 } 00634 00635 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 00636 // Constant folding for extract value is trivial. 00637 Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); 00638 if (!C) 00639 C = SimplifiedValues.lookup(I.getAggregateOperand()); 00640 if (C) { 00641 SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); 00642 return true; 00643 } 00644 00645 // SROA can look through these but give them a cost. 00646 return false; 00647 } 00648 00649 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 00650 // Constant folding for insert value is trivial. 00651 Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); 00652 if (!AggC) 00653 AggC = SimplifiedValues.lookup(I.getAggregateOperand()); 00654 Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); 00655 if (!InsertedC) 00656 InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); 00657 if (AggC && InsertedC) { 00658 SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, 00659 I.getIndices()); 00660 return true; 00661 } 00662 00663 // SROA can look through these but give them a cost. 00664 return false; 00665 } 00666 00667 /// \brief Try to simplify a call site. 00668 /// 00669 /// Takes a concrete function and callsite and tries to actually simplify it by 00670 /// analyzing the arguments and call itself with instsimplify. Returns true if 00671 /// it has simplified the callsite to some other entity (a constant), making it 00672 /// free. 00673 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { 00674 // FIXME: Using the instsimplify logic directly for this is inefficient 00675 // because we have to continually rebuild the argument list even when no 00676 // simplifications can be performed. Until that is fixed with remapping 00677 // inside of instsimplify, directly constant fold calls here. 00678 if (!canConstantFoldCallTo(F)) 00679 return false; 00680 00681 // Try to re-map the arguments to constants. 00682 SmallVector<Constant *, 4> ConstantArgs; 00683 ConstantArgs.reserve(CS.arg_size()); 00684 for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 00685 I != E; ++I) { 00686 Constant *C = dyn_cast<Constant>(*I); 00687 if (!C) 00688 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); 00689 if (!C) 00690 return false; // This argument doesn't map to a constant. 00691 00692 ConstantArgs.push_back(C); 00693 } 00694 if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { 00695 SimplifiedValues[CS.getInstruction()] = C; 00696 return true; 00697 } 00698 00699 return false; 00700 } 00701 00702 bool CallAnalyzer::visitCallSite(CallSite CS) { 00703 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && 00704 !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 00705 Attribute::ReturnsTwice)) { 00706 // This aborts the entire analysis. 00707 ExposesReturnsTwice = true; 00708 return false; 00709 } 00710 if (CS.isCall() && 00711 cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) 00712 ContainsNoDuplicateCall = true; 00713 00714 if (Function *F = CS.getCalledFunction()) { 00715 // When we have a concrete function, first try to simplify it directly. 00716 if (simplifyCallSite(F, CS)) 00717 return true; 00718 00719 // Next check if it is an intrinsic we know about. 00720 // FIXME: Lift this into part of the InstVisitor. 00721 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { 00722 switch (II->getIntrinsicID()) { 00723 default: 00724 return Base::visitCallSite(CS); 00725 00726 case Intrinsic::memset: 00727 case Intrinsic::memcpy: 00728 case Intrinsic::memmove: 00729 // SROA can usually chew through these intrinsics, but they aren't free. 00730 return false; 00731 } 00732 } 00733 00734 if (F == CS.getInstruction()->getParent()->getParent()) { 00735 // This flag will fully abort the analysis, so don't bother with anything 00736 // else. 00737 IsRecursiveCall = true; 00738 return false; 00739 } 00740 00741 if (TTI.isLoweredToCall(F)) { 00742 // We account for the average 1 instruction per call argument setup 00743 // here. 00744 Cost += CS.arg_size() * InlineConstants::InstrCost; 00745 00746 // Everything other than inline ASM will also have a significant cost 00747 // merely from making the call. 00748 if (!isa<InlineAsm>(CS.getCalledValue())) 00749 Cost += InlineConstants::CallPenalty; 00750 } 00751 00752 return Base::visitCallSite(CS); 00753 } 00754 00755 // Otherwise we're in a very special case -- an indirect function call. See 00756 // if we can be particularly clever about this. 00757 Value *Callee = CS.getCalledValue(); 00758 00759 // First, pay the price of the argument setup. We account for the average 00760 // 1 instruction per call argument setup here. 00761 Cost += CS.arg_size() * InlineConstants::InstrCost; 00762 00763 // Next, check if this happens to be an indirect function call to a known 00764 // function in this inline context. If not, we've done all we can. 00765 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 00766 if (!F) 00767 return Base::visitCallSite(CS); 00768 00769 // If we have a constant that we are calling as a function, we can peer 00770 // through it and see the function target. This happens not infrequently 00771 // during devirtualization and so we want to give it a hefty bonus for 00772 // inlining, but cap that bonus in the event that inlining wouldn't pan 00773 // out. Pretend to inline the function, with a custom threshold. 00774 CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); 00775 if (CA.analyzeCall(CS)) { 00776 // We were able to inline the indirect call! Subtract the cost from the 00777 // bonus we want to apply, but don't go below zero. 00778 Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); 00779 } 00780 00781 return Base::visitCallSite(CS); 00782 } 00783 00784 bool CallAnalyzer::visitInstruction(Instruction &I) { 00785 // Some instructions are free. All of the free intrinsics can also be 00786 // handled by SROA, etc. 00787 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) 00788 return true; 00789 00790 // We found something we don't understand or can't handle. Mark any SROA-able 00791 // values in the operand list as no longer viable. 00792 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) 00793 disableSROA(*OI); 00794 00795 return false; 00796 } 00797 00798 00799 /// \brief Analyze a basic block for its contribution to the inline cost. 00800 /// 00801 /// This method walks the analyzer over every instruction in the given basic 00802 /// block and accounts for their cost during inlining at this callsite. It 00803 /// aborts early if the threshold has been exceeded or an impossible to inline 00804 /// construct has been detected. It returns false if inlining is no longer 00805 /// viable, and true if inlining remains viable. 00806 bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { 00807 for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); 00808 I != E; ++I) { 00809 ++NumInstructions; 00810 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) 00811 ++NumVectorInstructions; 00812 00813 // If the instruction simplified to a constant, there is no cost to this 00814 // instruction. Visit the instructions using our InstVisitor to account for 00815 // all of the per-instruction logic. The visit tree returns true if we 00816 // consumed the instruction in any way, and false if the instruction's base 00817 // cost should count against inlining. 00818 if (Base::visit(I)) 00819 ++NumInstructionsSimplified; 00820 else 00821 Cost += InlineConstants::InstrCost; 00822 00823 // If the visit this instruction detected an uninlinable pattern, abort. 00824 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) 00825 return false; 00826 00827 // If the caller is a recursive function then we don't want to inline 00828 // functions which allocate a lot of stack space because it would increase 00829 // the caller stack usage dramatically. 00830 if (IsCallerRecursive && 00831 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 00832 return false; 00833 00834 if (NumVectorInstructions > NumInstructions/2) 00835 VectorBonus = FiftyPercentVectorBonus; 00836 else if (NumVectorInstructions > NumInstructions/10) 00837 VectorBonus = TenPercentVectorBonus; 00838 else 00839 VectorBonus = 0; 00840 00841 // Check if we've past the threshold so we don't spin in huge basic 00842 // blocks that will never inline. 00843 if (Cost > (Threshold + VectorBonus)) 00844 return false; 00845 } 00846 00847 return true; 00848 } 00849 00850 /// \brief Compute the base pointer and cumulative constant offsets for V. 00851 /// 00852 /// This strips all constant offsets off of V, leaving it the base pointer, and 00853 /// accumulates the total constant offset applied in the returned constant. It 00854 /// returns 0 if V is not a pointer, and returns the constant '0' if there are 00855 /// no constant offsets applied. 00856 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 00857 if (!TD || !V->getType()->isPointerTy()) 00858 return 0; 00859 00860 unsigned IntPtrWidth = TD->getPointerSizeInBits(); 00861 APInt Offset = APInt::getNullValue(IntPtrWidth); 00862 00863 // Even though we don't look through PHI nodes, we could be called on an 00864 // instruction in an unreachable block, which may be on a cycle. 00865 SmallPtrSet<Value *, 4> Visited; 00866 Visited.insert(V); 00867 do { 00868 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 00869 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 00870 return 0; 00871 V = GEP->getPointerOperand(); 00872 } else if (Operator::getOpcode(V) == Instruction::BitCast) { 00873 V = cast<Operator>(V)->getOperand(0); 00874 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 00875 if (GA->mayBeOverridden()) 00876 break; 00877 V = GA->getAliasee(); 00878 } else { 00879 break; 00880 } 00881 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 00882 } while (Visited.insert(V)); 00883 00884 Type *IntPtrTy = TD->getIntPtrType(V->getContext()); 00885 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); 00886 } 00887 00888 /// \brief Analyze a call site for potential inlining. 00889 /// 00890 /// Returns true if inlining this call is viable, and false if it is not 00891 /// viable. It computes the cost and adjusts the threshold based on numerous 00892 /// factors and heuristics. If this method returns false but the computed cost 00893 /// is below the computed threshold, then inlining was forcibly disabled by 00894 /// some artifact of the routine. 00895 bool CallAnalyzer::analyzeCall(CallSite CS) { 00896 ++NumCallsAnalyzed; 00897 00898 // Track whether the post-inlining function would have more than one basic 00899 // block. A single basic block is often intended for inlining. Balloon the 00900 // threshold by 50% until we pass the single-BB phase. 00901 bool SingleBB = true; 00902 int SingleBBBonus = Threshold / 2; 00903 Threshold += SingleBBBonus; 00904 00905 // Perform some tweaks to the cost and threshold based on the direct 00906 // callsite information. 00907 00908 // We want to more aggressively inline vector-dense kernels, so up the 00909 // threshold, and we'll lower it if the % of vector instructions gets too 00910 // low. 00911 assert(NumInstructions == 0); 00912 assert(NumVectorInstructions == 0); 00913 FiftyPercentVectorBonus = Threshold; 00914 TenPercentVectorBonus = Threshold / 2; 00915 00916 // Give out bonuses per argument, as the instructions setting them up will 00917 // be gone after inlining. 00918 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { 00919 if (TD && CS.isByValArgument(I)) { 00920 // We approximate the number of loads and stores needed by dividing the 00921 // size of the byval type by the target's pointer size. 00922 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); 00923 unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); 00924 unsigned PointerSize = TD->getPointerSizeInBits(); 00925 // Ceiling division. 00926 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 00927 00928 // If it generates more than 8 stores it is likely to be expanded as an 00929 // inline memcpy so we take that as an upper bound. Otherwise we assume 00930 // one load and one store per word copied. 00931 // FIXME: The maxStoresPerMemcpy setting from the target should be used 00932 // here instead of a magic number of 8, but it's not available via 00933 // DataLayout. 00934 NumStores = std::min(NumStores, 8U); 00935 00936 Cost -= 2 * NumStores * InlineConstants::InstrCost; 00937 } else { 00938 // For non-byval arguments subtract off one instruction per call 00939 // argument. 00940 Cost -= InlineConstants::InstrCost; 00941 } 00942 } 00943 00944 // If there is only one call of the function, and it has internal linkage, 00945 // the cost of inlining it drops dramatically. 00946 bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && 00947 &F == CS.getCalledFunction(); 00948 if (OnlyOneCallAndLocalLinkage) 00949 Cost += InlineConstants::LastCallToStaticBonus; 00950 00951 // If the instruction after the call, or if the normal destination of the 00952 // invoke is an unreachable instruction, the function is noreturn. As such, 00953 // there is little point in inlining this unless there is literally zero 00954 // cost. 00955 Instruction *Instr = CS.getInstruction(); 00956 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { 00957 if (isa<UnreachableInst>(II->getNormalDest()->begin())) 00958 Threshold = 1; 00959 } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) 00960 Threshold = 1; 00961 00962 // If this function uses the coldcc calling convention, prefer not to inline 00963 // it. 00964 if (F.getCallingConv() == CallingConv::Cold) 00965 Cost += InlineConstants::ColdccPenalty; 00966 00967 // Check if we're done. This can happen due to bonuses and penalties. 00968 if (Cost > Threshold) 00969 return false; 00970 00971 if (F.empty()) 00972 return true; 00973 00974 Function *Caller = CS.getInstruction()->getParent()->getParent(); 00975 // Check if the caller function is recursive itself. 00976 for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); 00977 U != E; ++U) { 00978 CallSite Site(cast<Value>(*U)); 00979 if (!Site) 00980 continue; 00981 Instruction *I = Site.getInstruction(); 00982 if (I->getParent()->getParent() == Caller) { 00983 IsCallerRecursive = true; 00984 break; 00985 } 00986 } 00987 00988 // Track whether we've seen a return instruction. The first return 00989 // instruction is free, as at least one will usually disappear in inlining. 00990 bool HasReturn = false; 00991 00992 // Populate our simplified values by mapping from function arguments to call 00993 // arguments with known important simplifications. 00994 CallSite::arg_iterator CAI = CS.arg_begin(); 00995 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); 00996 FAI != FAE; ++FAI, ++CAI) { 00997 assert(CAI != CS.arg_end()); 00998 if (Constant *C = dyn_cast<Constant>(CAI)) 00999 SimplifiedValues[FAI] = C; 01000 01001 Value *PtrArg = *CAI; 01002 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 01003 ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); 01004 01005 // We can SROA any pointer arguments derived from alloca instructions. 01006 if (isa<AllocaInst>(PtrArg)) { 01007 SROAArgValues[FAI] = PtrArg; 01008 SROAArgCosts[PtrArg] = 0; 01009 } 01010 } 01011 } 01012 NumConstantArgs = SimplifiedValues.size(); 01013 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 01014 NumAllocaArgs = SROAArgValues.size(); 01015 01016 // The worklist of live basic blocks in the callee *after* inlining. We avoid 01017 // adding basic blocks of the callee which can be proven to be dead for this 01018 // particular call site in order to get more accurate cost estimates. This 01019 // requires a somewhat heavyweight iteration pattern: we need to walk the 01020 // basic blocks in a breadth-first order as we insert live successors. To 01021 // accomplish this, prioritizing for small iterations because we exit after 01022 // crossing our threshold, we use a small-size optimized SetVector. 01023 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, 01024 SmallPtrSet<BasicBlock *, 16> > BBSetVector; 01025 BBSetVector BBWorklist; 01026 BBWorklist.insert(&F.getEntryBlock()); 01027 // Note that we *must not* cache the size, this loop grows the worklist. 01028 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 01029 // Bail out the moment we cross the threshold. This means we'll under-count 01030 // the cost, but only when undercounting doesn't matter. 01031 if (Cost > (Threshold + VectorBonus)) 01032 break; 01033 01034 BasicBlock *BB = BBWorklist[Idx]; 01035 if (BB->empty()) 01036 continue; 01037 01038 // Handle the terminator cost here where we can track returns and other 01039 // function-wide constructs. 01040 TerminatorInst *TI = BB->getTerminator(); 01041 01042 // We never want to inline functions that contain an indirectbr. This is 01043 // incorrect because all the blockaddress's (in static global initializers 01044 // for example) would be referring to the original function, and this 01045 // indirect jump would jump from the inlined copy of the function into the 01046 // original function which is extremely undefined behavior. 01047 // FIXME: This logic isn't really right; we can safely inline functions 01048 // with indirectbr's as long as no other function or global references the 01049 // blockaddress of a block within the current function. And as a QOI issue, 01050 // if someone is using a blockaddress without an indirectbr, and that 01051 // reference somehow ends up in another function or global, we probably 01052 // don't want to inline this function. 01053 if (isa<IndirectBrInst>(TI)) 01054 return false; 01055 01056 if (!HasReturn && isa<ReturnInst>(TI)) 01057 HasReturn = true; 01058 else 01059 Cost += InlineConstants::InstrCost; 01060 01061 // Analyze the cost of this block. If we blow through the threshold, this 01062 // returns false, and we can bail on out. 01063 if (!analyzeBlock(BB)) { 01064 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) 01065 return false; 01066 01067 // If the caller is a recursive function then we don't want to inline 01068 // functions which allocate a lot of stack space because it would increase 01069 // the caller stack usage dramatically. 01070 if (IsCallerRecursive && 01071 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 01072 return false; 01073 01074 break; 01075 } 01076 01077 // Add in the live successors by first checking whether we have terminator 01078 // that may be simplified based on the values simplified by this call. 01079 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 01080 if (BI->isConditional()) { 01081 Value *Cond = BI->getCondition(); 01082 if (ConstantInt *SimpleCond 01083 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 01084 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); 01085 continue; 01086 } 01087 } 01088 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 01089 Value *Cond = SI->getCondition(); 01090 if (ConstantInt *SimpleCond 01091 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 01092 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); 01093 continue; 01094 } 01095 } 01096 01097 // If we're unable to select a particular successor, just count all of 01098 // them. 01099 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; 01100 ++TIdx) 01101 BBWorklist.insert(TI->getSuccessor(TIdx)); 01102 01103 // If we had any successors at this point, than post-inlining is likely to 01104 // have them as well. Note that we assume any basic blocks which existed 01105 // due to branches or switches which folded above will also fold after 01106 // inlining. 01107 if (SingleBB && TI->getNumSuccessors() > 1) { 01108 // Take off the bonus we applied to the threshold. 01109 Threshold -= SingleBBBonus; 01110 SingleBB = false; 01111 } 01112 } 01113 01114 // If this is a noduplicate call, we can still inline as long as 01115 // inlining this would cause the removal of the caller (so the instruction 01116 // is not actually duplicated, just moved). 01117 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) 01118 return false; 01119 01120 Threshold += VectorBonus; 01121 01122 return Cost < Threshold; 01123 } 01124 01125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01126 /// \brief Dump stats about this call's analysis. 01127 void CallAnalyzer::dump() { 01128 #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" 01129 DEBUG_PRINT_STAT(NumConstantArgs); 01130 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 01131 DEBUG_PRINT_STAT(NumAllocaArgs); 01132 DEBUG_PRINT_STAT(NumConstantPtrCmps); 01133 DEBUG_PRINT_STAT(NumConstantPtrDiffs); 01134 DEBUG_PRINT_STAT(NumInstructionsSimplified); 01135 DEBUG_PRINT_STAT(SROACostSavings); 01136 DEBUG_PRINT_STAT(SROACostSavingsLost); 01137 DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 01138 #undef DEBUG_PRINT_STAT 01139 } 01140 #endif 01141 01142 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 01143 true, true) 01144 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 01145 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 01146 true, true) 01147 01148 char InlineCostAnalysis::ID = 0; 01149 01150 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} 01151 01152 InlineCostAnalysis::~InlineCostAnalysis() {} 01153 01154 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 01155 AU.setPreservesAll(); 01156 AU.addRequired<TargetTransformInfo>(); 01157 CallGraphSCCPass::getAnalysisUsage(AU); 01158 } 01159 01160 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { 01161 TD = getAnalysisIfAvailable<DataLayout>(); 01162 TTI = &getAnalysis<TargetTransformInfo>(); 01163 return false; 01164 } 01165 01166 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { 01167 return getInlineCost(CS, CS.getCalledFunction(), Threshold); 01168 } 01169 01170 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, 01171 int Threshold) { 01172 // Cannot inline indirect calls. 01173 if (!Callee) 01174 return llvm::InlineCost::getNever(); 01175 01176 // Calls to functions with always-inline attributes should be inlined 01177 // whenever possible. 01178 if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 01179 Attribute::AlwaysInline)) { 01180 if (isInlineViable(*Callee)) 01181 return llvm::InlineCost::getAlways(); 01182 return llvm::InlineCost::getNever(); 01183 } 01184 01185 // Don't inline functions which can be redefined at link-time to mean 01186 // something else. Don't inline functions marked noinline or call sites 01187 // marked noinline. 01188 if (Callee->mayBeOverridden() || 01189 Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 01190 Attribute::NoInline) || 01191 CS.isNoInline()) 01192 return llvm::InlineCost::getNever(); 01193 01194 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 01195 << "...\n"); 01196 01197 CallAnalyzer CA(TD, *TTI, *Callee, Threshold); 01198 bool ShouldInline = CA.analyzeCall(CS); 01199 01200 DEBUG(CA.dump()); 01201 01202 // Check if there was a reason to force inlining or no inlining. 01203 if (!ShouldInline && CA.getCost() < CA.getThreshold()) 01204 return InlineCost::getNever(); 01205 if (ShouldInline && CA.getCost() >= CA.getThreshold()) 01206 return InlineCost::getAlways(); 01207 01208 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); 01209 } 01210 01211 bool InlineCostAnalysis::isInlineViable(Function &F) { 01212 bool ReturnsTwice = 01213 F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 01214 Attribute::ReturnsTwice); 01215 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 01216 // Disallow inlining of functions which contain an indirect branch. 01217 if (isa<IndirectBrInst>(BI->getTerminator())) 01218 return false; 01219 01220 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; 01221 ++II) { 01222 CallSite CS(II); 01223 if (!CS) 01224 continue; 01225 01226 // Disallow recursive calls. 01227 if (&F == CS.getCalledFunction()) 01228 return false; 01229 01230 // Disallow calls which expose returns-twice to a function not previously 01231 // attributed as such. 01232 if (!ReturnsTwice && CS.isCall() && 01233 cast<CallInst>(CS.getInstruction())->canReturnTwice()) 01234 return false; 01235 } 01236 } 01237 01238 return true; 01239 }