LLVM API Documentation
00001 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements an analysis that determines, for a given memory 00011 // operation, what preceding memory operations it depends on. It builds on 00012 // alias analysis information, and tries to provide a lazy, caching interface to 00013 // a common kind of alias information query. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #define DEBUG_TYPE "memdep" 00018 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 00019 #include "llvm/ADT/STLExtras.h" 00020 #include "llvm/ADT/Statistic.h" 00021 #include "llvm/Analysis/AliasAnalysis.h" 00022 #include "llvm/Analysis/Dominators.h" 00023 #include "llvm/Analysis/InstructionSimplify.h" 00024 #include "llvm/Analysis/MemoryBuiltins.h" 00025 #include "llvm/Analysis/PHITransAddr.h" 00026 #include "llvm/Analysis/ValueTracking.h" 00027 #include "llvm/IR/DataLayout.h" 00028 #include "llvm/IR/Function.h" 00029 #include "llvm/IR/Instructions.h" 00030 #include "llvm/IR/IntrinsicInst.h" 00031 #include "llvm/IR/LLVMContext.h" 00032 #include "llvm/Support/Debug.h" 00033 #include "llvm/Support/PredIteratorCache.h" 00034 using namespace llvm; 00035 00036 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); 00037 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); 00038 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); 00039 00040 STATISTIC(NumCacheNonLocalPtr, 00041 "Number of fully cached non-local ptr responses"); 00042 STATISTIC(NumCacheDirtyNonLocalPtr, 00043 "Number of cached, but dirty, non-local ptr responses"); 00044 STATISTIC(NumUncacheNonLocalPtr, 00045 "Number of uncached non-local ptr responses"); 00046 STATISTIC(NumCacheCompleteNonLocalPtr, 00047 "Number of block queries that were completely cached"); 00048 00049 // Limit for the number of instructions to scan in a block. 00050 static const int BlockScanLimit = 100; 00051 00052 char MemoryDependenceAnalysis::ID = 0; 00053 00054 // Register this pass... 00055 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", 00056 "Memory Dependence Analysis", false, true) 00057 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00058 INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", 00059 "Memory Dependence Analysis", false, true) 00060 00061 MemoryDependenceAnalysis::MemoryDependenceAnalysis() 00062 : FunctionPass(ID), PredCache(0) { 00063 initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); 00064 } 00065 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { 00066 } 00067 00068 /// Clean up memory in between runs 00069 void MemoryDependenceAnalysis::releaseMemory() { 00070 LocalDeps.clear(); 00071 NonLocalDeps.clear(); 00072 NonLocalPointerDeps.clear(); 00073 ReverseLocalDeps.clear(); 00074 ReverseNonLocalDeps.clear(); 00075 ReverseNonLocalPtrDeps.clear(); 00076 PredCache->clear(); 00077 } 00078 00079 00080 00081 /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. 00082 /// 00083 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 00084 AU.setPreservesAll(); 00085 AU.addRequiredTransitive<AliasAnalysis>(); 00086 } 00087 00088 bool MemoryDependenceAnalysis::runOnFunction(Function &) { 00089 AA = &getAnalysis<AliasAnalysis>(); 00090 TD = getAnalysisIfAvailable<DataLayout>(); 00091 DT = getAnalysisIfAvailable<DominatorTree>(); 00092 if (!PredCache) 00093 PredCache.reset(new PredIteratorCache()); 00094 return false; 00095 } 00096 00097 /// RemoveFromReverseMap - This is a helper function that removes Val from 00098 /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. 00099 template <typename KeyTy> 00100 static void RemoveFromReverseMap(DenseMap<Instruction*, 00101 SmallPtrSet<KeyTy, 4> > &ReverseMap, 00102 Instruction *Inst, KeyTy Val) { 00103 typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator 00104 InstIt = ReverseMap.find(Inst); 00105 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); 00106 bool Found = InstIt->second.erase(Val); 00107 assert(Found && "Invalid reverse map!"); (void)Found; 00108 if (InstIt->second.empty()) 00109 ReverseMap.erase(InstIt); 00110 } 00111 00112 /// GetLocation - If the given instruction references a specific memory 00113 /// location, fill in Loc with the details, otherwise set Loc.Ptr to null. 00114 /// Return a ModRefInfo value describing the general behavior of the 00115 /// instruction. 00116 static 00117 AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, 00118 AliasAnalysis::Location &Loc, 00119 AliasAnalysis *AA) { 00120 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 00121 if (LI->isUnordered()) { 00122 Loc = AA->getLocation(LI); 00123 return AliasAnalysis::Ref; 00124 } 00125 if (LI->getOrdering() == Monotonic) { 00126 Loc = AA->getLocation(LI); 00127 return AliasAnalysis::ModRef; 00128 } 00129 Loc = AliasAnalysis::Location(); 00130 return AliasAnalysis::ModRef; 00131 } 00132 00133 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 00134 if (SI->isUnordered()) { 00135 Loc = AA->getLocation(SI); 00136 return AliasAnalysis::Mod; 00137 } 00138 if (SI->getOrdering() == Monotonic) { 00139 Loc = AA->getLocation(SI); 00140 return AliasAnalysis::ModRef; 00141 } 00142 Loc = AliasAnalysis::Location(); 00143 return AliasAnalysis::ModRef; 00144 } 00145 00146 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { 00147 Loc = AA->getLocation(V); 00148 return AliasAnalysis::ModRef; 00149 } 00150 00151 if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { 00152 // calls to free() deallocate the entire structure 00153 Loc = AliasAnalysis::Location(CI->getArgOperand(0)); 00154 return AliasAnalysis::Mod; 00155 } 00156 00157 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) 00158 switch (II->getIntrinsicID()) { 00159 case Intrinsic::lifetime_start: 00160 case Intrinsic::lifetime_end: 00161 case Intrinsic::invariant_start: 00162 Loc = AliasAnalysis::Location(II->getArgOperand(1), 00163 cast<ConstantInt>(II->getArgOperand(0)) 00164 ->getZExtValue(), 00165 II->getMetadata(LLVMContext::MD_tbaa)); 00166 // These intrinsics don't really modify the memory, but returning Mod 00167 // will allow them to be handled conservatively. 00168 return AliasAnalysis::Mod; 00169 case Intrinsic::invariant_end: 00170 Loc = AliasAnalysis::Location(II->getArgOperand(2), 00171 cast<ConstantInt>(II->getArgOperand(1)) 00172 ->getZExtValue(), 00173 II->getMetadata(LLVMContext::MD_tbaa)); 00174 // These intrinsics don't really modify the memory, but returning Mod 00175 // will allow them to be handled conservatively. 00176 return AliasAnalysis::Mod; 00177 default: 00178 break; 00179 } 00180 00181 // Otherwise, just do the coarse-grained thing that always works. 00182 if (Inst->mayWriteToMemory()) 00183 return AliasAnalysis::ModRef; 00184 if (Inst->mayReadFromMemory()) 00185 return AliasAnalysis::Ref; 00186 return AliasAnalysis::NoModRef; 00187 } 00188 00189 /// getCallSiteDependencyFrom - Private helper for finding the local 00190 /// dependencies of a call site. 00191 MemDepResult MemoryDependenceAnalysis:: 00192 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, 00193 BasicBlock::iterator ScanIt, BasicBlock *BB) { 00194 unsigned Limit = BlockScanLimit; 00195 00196 // Walk backwards through the block, looking for dependencies 00197 while (ScanIt != BB->begin()) { 00198 // Limit the amount of scanning we do so we don't end up with quadratic 00199 // running time on extreme testcases. 00200 --Limit; 00201 if (!Limit) 00202 return MemDepResult::getUnknown(); 00203 00204 Instruction *Inst = --ScanIt; 00205 00206 // If this inst is a memory op, get the pointer it accessed 00207 AliasAnalysis::Location Loc; 00208 AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); 00209 if (Loc.Ptr) { 00210 // A simple instruction. 00211 if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) 00212 return MemDepResult::getClobber(Inst); 00213 continue; 00214 } 00215 00216 if (CallSite InstCS = cast<Value>(Inst)) { 00217 // Debug intrinsics don't cause dependences. 00218 if (isa<DbgInfoIntrinsic>(Inst)) continue; 00219 // If these two calls do not interfere, look past it. 00220 switch (AA->getModRefInfo(CS, InstCS)) { 00221 case AliasAnalysis::NoModRef: 00222 // If the two calls are the same, return InstCS as a Def, so that 00223 // CS can be found redundant and eliminated. 00224 if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && 00225 CS.getInstruction()->isIdenticalToWhenDefined(Inst)) 00226 return MemDepResult::getDef(Inst); 00227 00228 // Otherwise if the two calls don't interact (e.g. InstCS is readnone) 00229 // keep scanning. 00230 continue; 00231 default: 00232 return MemDepResult::getClobber(Inst); 00233 } 00234 } 00235 00236 // If we could not obtain a pointer for the instruction and the instruction 00237 // touches memory then assume that this is a dependency. 00238 if (MR != AliasAnalysis::NoModRef) 00239 return MemDepResult::getClobber(Inst); 00240 } 00241 00242 // No dependence found. If this is the entry block of the function, it is 00243 // unknown, otherwise it is non-local. 00244 if (BB != &BB->getParent()->getEntryBlock()) 00245 return MemDepResult::getNonLocal(); 00246 return MemDepResult::getNonFuncLocal(); 00247 } 00248 00249 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that 00250 /// would fully overlap MemLoc if done as a wider legal integer load. 00251 /// 00252 /// MemLocBase, MemLocOffset are lazily computed here the first time the 00253 /// base/offs of memloc is needed. 00254 static bool 00255 isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, 00256 const Value *&MemLocBase, 00257 int64_t &MemLocOffs, 00258 const LoadInst *LI, 00259 const DataLayout *TD) { 00260 // If we have no target data, we can't do this. 00261 if (TD == 0) return false; 00262 00263 // If we haven't already computed the base/offset of MemLoc, do so now. 00264 if (MemLocBase == 0) 00265 MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); 00266 00267 unsigned Size = MemoryDependenceAnalysis:: 00268 getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, 00269 LI, *TD); 00270 return Size != 0; 00271 } 00272 00273 /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that 00274 /// looks at a memory location for a load (specified by MemLocBase, Offs, 00275 /// and Size) and compares it against a load. If the specified load could 00276 /// be safely widened to a larger integer load that is 1) still efficient, 00277 /// 2) safe for the target, and 3) would provide the specified memory 00278 /// location value, then this function returns the size in bytes of the 00279 /// load width to use. If not, this returns zero. 00280 unsigned MemoryDependenceAnalysis:: 00281 getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, 00282 unsigned MemLocSize, const LoadInst *LI, 00283 const DataLayout &TD) { 00284 // We can only extend simple integer loads. 00285 if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; 00286 00287 // Load widening is hostile to ThreadSanitizer: it may cause false positives 00288 // or make the reports more cryptic (access sizes are wrong). 00289 if (LI->getParent()->getParent()->getAttributes(). 00290 hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) 00291 return 0; 00292 00293 // Get the base of this load. 00294 int64_t LIOffs = 0; 00295 const Value *LIBase = 00296 GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); 00297 00298 // If the two pointers are not based on the same pointer, we can't tell that 00299 // they are related. 00300 if (LIBase != MemLocBase) return 0; 00301 00302 // Okay, the two values are based on the same pointer, but returned as 00303 // no-alias. This happens when we have things like two byte loads at "P+1" 00304 // and "P+3". Check to see if increasing the size of the "LI" load up to its 00305 // alignment (or the largest native integer type) will allow us to load all 00306 // the bits required by MemLoc. 00307 00308 // If MemLoc is before LI, then no widening of LI will help us out. 00309 if (MemLocOffs < LIOffs) return 0; 00310 00311 // Get the alignment of the load in bytes. We assume that it is safe to load 00312 // any legal integer up to this size without a problem. For example, if we're 00313 // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can 00314 // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it 00315 // to i16. 00316 unsigned LoadAlign = LI->getAlignment(); 00317 00318 int64_t MemLocEnd = MemLocOffs+MemLocSize; 00319 00320 // If no amount of rounding up will let MemLoc fit into LI, then bail out. 00321 if (LIOffs+LoadAlign < MemLocEnd) return 0; 00322 00323 // This is the size of the load to try. Start with the next larger power of 00324 // two. 00325 unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; 00326 NewLoadByteSize = NextPowerOf2(NewLoadByteSize); 00327 00328 while (1) { 00329 // If this load size is bigger than our known alignment or would not fit 00330 // into a native integer register, then we fail. 00331 if (NewLoadByteSize > LoadAlign || 00332 !TD.fitsInLegalInteger(NewLoadByteSize*8)) 00333 return 0; 00334 00335 if (LIOffs+NewLoadByteSize > MemLocEnd && 00336 LI->getParent()->getParent()->getAttributes(). 00337 hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) 00338 // We will be reading past the location accessed by the original program. 00339 // While this is safe in a regular build, Address Safety analysis tools 00340 // may start reporting false warnings. So, don't do widening. 00341 return 0; 00342 00343 // If a load of this width would include all of MemLoc, then we succeed. 00344 if (LIOffs+NewLoadByteSize >= MemLocEnd) 00345 return NewLoadByteSize; 00346 00347 NewLoadByteSize <<= 1; 00348 } 00349 } 00350 00351 /// getPointerDependencyFrom - Return the instruction on which a memory 00352 /// location depends. If isLoad is true, this routine ignores may-aliases with 00353 /// read-only operations. If isLoad is false, this routine ignores may-aliases 00354 /// with reads from read-only locations. If possible, pass the query 00355 /// instruction as well; this function may take advantage of the metadata 00356 /// annotated to the query instruction to refine the result. 00357 MemDepResult MemoryDependenceAnalysis:: 00358 getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 00359 BasicBlock::iterator ScanIt, BasicBlock *BB, 00360 Instruction *QueryInst) { 00361 00362 const Value *MemLocBase = 0; 00363 int64_t MemLocOffset = 0; 00364 unsigned Limit = BlockScanLimit; 00365 bool isInvariantLoad = false; 00366 if (isLoad && QueryInst) { 00367 LoadInst *LI = dyn_cast<LoadInst>(QueryInst); 00368 if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) 00369 isInvariantLoad = true; 00370 } 00371 00372 // Walk backwards through the basic block, looking for dependencies. 00373 while (ScanIt != BB->begin()) { 00374 // Limit the amount of scanning we do so we don't end up with quadratic 00375 // running time on extreme testcases. 00376 --Limit; 00377 if (!Limit) 00378 return MemDepResult::getUnknown(); 00379 00380 Instruction *Inst = --ScanIt; 00381 00382 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 00383 // Debug intrinsics don't (and can't) cause dependences. 00384 if (isa<DbgInfoIntrinsic>(II)) continue; 00385 00386 // If we reach a lifetime begin or end marker, then the query ends here 00387 // because the value is undefined. 00388 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 00389 // FIXME: This only considers queries directly on the invariant-tagged 00390 // pointer, not on query pointers that are indexed off of them. It'd 00391 // be nice to handle that at some point (the right approach is to use 00392 // GetPointerBaseWithConstantOffset). 00393 if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), 00394 MemLoc)) 00395 return MemDepResult::getDef(II); 00396 continue; 00397 } 00398 } 00399 00400 // Values depend on loads if the pointers are must aliased. This means that 00401 // a load depends on another must aliased load from the same value. 00402 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 00403 // Atomic loads have complications involved. 00404 // FIXME: This is overly conservative. 00405 if (!LI->isUnordered()) 00406 return MemDepResult::getClobber(LI); 00407 00408 AliasAnalysis::Location LoadLoc = AA->getLocation(LI); 00409 00410 // If we found a pointer, check if it could be the same as our pointer. 00411 AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); 00412 00413 if (isLoad) { 00414 if (R == AliasAnalysis::NoAlias) { 00415 // If this is an over-aligned integer load (for example, 00416 // "load i8* %P, align 4") see if it would obviously overlap with the 00417 // queried location if widened to a larger load (e.g. if the queried 00418 // location is 1 byte at P+1). If so, return it as a load/load 00419 // clobber result, allowing the client to decide to widen the load if 00420 // it wants to. 00421 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) 00422 if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && 00423 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, 00424 MemLocOffset, LI, TD)) 00425 return MemDepResult::getClobber(Inst); 00426 00427 continue; 00428 } 00429 00430 // Must aliased loads are defs of each other. 00431 if (R == AliasAnalysis::MustAlias) 00432 return MemDepResult::getDef(Inst); 00433 00434 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads 00435 // in terms of clobbering loads, but since it does this by looking 00436 // at the clobbering load directly, it doesn't know about any 00437 // phi translation that may have happened along the way. 00438 00439 // If we have a partial alias, then return this as a clobber for the 00440 // client to handle. 00441 if (R == AliasAnalysis::PartialAlias) 00442 return MemDepResult::getClobber(Inst); 00443 #endif 00444 00445 // Random may-alias loads don't depend on each other without a 00446 // dependence. 00447 continue; 00448 } 00449 00450 // Stores don't depend on other no-aliased accesses. 00451 if (R == AliasAnalysis::NoAlias) 00452 continue; 00453 00454 // Stores don't alias loads from read-only memory. 00455 if (AA->pointsToConstantMemory(LoadLoc)) 00456 continue; 00457 00458 // Stores depend on may/must aliased loads. 00459 return MemDepResult::getDef(Inst); 00460 } 00461 00462 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 00463 // Atomic stores have complications involved. 00464 // FIXME: This is overly conservative. 00465 if (!SI->isUnordered()) 00466 return MemDepResult::getClobber(SI); 00467 00468 // If alias analysis can tell that this store is guaranteed to not modify 00469 // the query pointer, ignore it. Use getModRefInfo to handle cases where 00470 // the query pointer points to constant memory etc. 00471 if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) 00472 continue; 00473 00474 // Ok, this store might clobber the query pointer. Check to see if it is 00475 // a must alias: in this case, we want to return this as a def. 00476 AliasAnalysis::Location StoreLoc = AA->getLocation(SI); 00477 00478 // If we found a pointer, check if it could be the same as our pointer. 00479 AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); 00480 00481 if (R == AliasAnalysis::NoAlias) 00482 continue; 00483 if (R == AliasAnalysis::MustAlias) 00484 return MemDepResult::getDef(Inst); 00485 if (isInvariantLoad) 00486 continue; 00487 return MemDepResult::getClobber(Inst); 00488 } 00489 00490 // If this is an allocation, and if we know that the accessed pointer is to 00491 // the allocation, return Def. This means that there is no dependence and 00492 // the access can be optimized based on that. For example, a load could 00493 // turn into undef. 00494 // Note: Only determine this to be a malloc if Inst is the malloc call, not 00495 // a subsequent bitcast of the malloc call result. There can be stores to 00496 // the malloced memory between the malloc call and its bitcast uses, and we 00497 // need to continue scanning until the malloc call. 00498 const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); 00499 if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { 00500 const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); 00501 00502 if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) 00503 return MemDepResult::getDef(Inst); 00504 // Be conservative if the accessed pointer may alias the allocation. 00505 if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) 00506 return MemDepResult::getClobber(Inst); 00507 // If the allocation is not aliased and does not read memory (like 00508 // strdup), it is safe to ignore. 00509 if (isa<AllocaInst>(Inst) || 00510 isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) 00511 continue; 00512 } 00513 00514 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. 00515 AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); 00516 // If necessary, perform additional analysis. 00517 if (MR == AliasAnalysis::ModRef) 00518 MR = AA->callCapturesBefore(Inst, MemLoc, DT); 00519 switch (MR) { 00520 case AliasAnalysis::NoModRef: 00521 // If the call has no effect on the queried pointer, just ignore it. 00522 continue; 00523 case AliasAnalysis::Mod: 00524 return MemDepResult::getClobber(Inst); 00525 case AliasAnalysis::Ref: 00526 // If the call is known to never store to the pointer, and if this is a 00527 // load query, we can safely ignore it (scan past it). 00528 if (isLoad) 00529 continue; 00530 default: 00531 // Otherwise, there is a potential dependence. Return a clobber. 00532 return MemDepResult::getClobber(Inst); 00533 } 00534 } 00535 00536 // No dependence found. If this is the entry block of the function, it is 00537 // unknown, otherwise it is non-local. 00538 if (BB != &BB->getParent()->getEntryBlock()) 00539 return MemDepResult::getNonLocal(); 00540 return MemDepResult::getNonFuncLocal(); 00541 } 00542 00543 /// getDependency - Return the instruction on which a memory operation 00544 /// depends. 00545 MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { 00546 Instruction *ScanPos = QueryInst; 00547 00548 // Check for a cached result 00549 MemDepResult &LocalCache = LocalDeps[QueryInst]; 00550 00551 // If the cached entry is non-dirty, just return it. Note that this depends 00552 // on MemDepResult's default constructing to 'dirty'. 00553 if (!LocalCache.isDirty()) 00554 return LocalCache; 00555 00556 // Otherwise, if we have a dirty entry, we know we can start the scan at that 00557 // instruction, which may save us some work. 00558 if (Instruction *Inst = LocalCache.getInst()) { 00559 ScanPos = Inst; 00560 00561 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); 00562 } 00563 00564 BasicBlock *QueryParent = QueryInst->getParent(); 00565 00566 // Do the scan. 00567 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { 00568 // No dependence found. If this is the entry block of the function, it is 00569 // unknown, otherwise it is non-local. 00570 if (QueryParent != &QueryParent->getParent()->getEntryBlock()) 00571 LocalCache = MemDepResult::getNonLocal(); 00572 else 00573 LocalCache = MemDepResult::getNonFuncLocal(); 00574 } else { 00575 AliasAnalysis::Location MemLoc; 00576 AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); 00577 if (MemLoc.Ptr) { 00578 // If we can do a pointer scan, make it happen. 00579 bool isLoad = !(MR & AliasAnalysis::Mod); 00580 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) 00581 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; 00582 00583 LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, 00584 QueryParent, QueryInst); 00585 } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { 00586 CallSite QueryCS(QueryInst); 00587 bool isReadOnly = AA->onlyReadsMemory(QueryCS); 00588 LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, 00589 QueryParent); 00590 } else 00591 // Non-memory instruction. 00592 LocalCache = MemDepResult::getUnknown(); 00593 } 00594 00595 // Remember the result! 00596 if (Instruction *I = LocalCache.getInst()) 00597 ReverseLocalDeps[I].insert(QueryInst); 00598 00599 return LocalCache; 00600 } 00601 00602 #ifndef NDEBUG 00603 /// AssertSorted - This method is used when -debug is specified to verify that 00604 /// cache arrays are properly kept sorted. 00605 static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, 00606 int Count = -1) { 00607 if (Count == -1) Count = Cache.size(); 00608 if (Count == 0) return; 00609 00610 for (unsigned i = 1; i != unsigned(Count); ++i) 00611 assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); 00612 } 00613 #endif 00614 00615 /// getNonLocalCallDependency - Perform a full dependency query for the 00616 /// specified call, returning the set of blocks that the value is 00617 /// potentially live across. The returned set of results will include a 00618 /// "NonLocal" result for all blocks where the value is live across. 00619 /// 00620 /// This method assumes the instruction returns a "NonLocal" dependency 00621 /// within its own block. 00622 /// 00623 /// This returns a reference to an internal data structure that may be 00624 /// invalidated on the next non-local query or when an instruction is 00625 /// removed. Clients must copy this data if they want it around longer than 00626 /// that. 00627 const MemoryDependenceAnalysis::NonLocalDepInfo & 00628 MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { 00629 assert(getDependency(QueryCS.getInstruction()).isNonLocal() && 00630 "getNonLocalCallDependency should only be used on calls with non-local deps!"); 00631 PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; 00632 NonLocalDepInfo &Cache = CacheP.first; 00633 00634 /// DirtyBlocks - This is the set of blocks that need to be recomputed. In 00635 /// the cached case, this can happen due to instructions being deleted etc. In 00636 /// the uncached case, this starts out as the set of predecessors we care 00637 /// about. 00638 SmallVector<BasicBlock*, 32> DirtyBlocks; 00639 00640 if (!Cache.empty()) { 00641 // Okay, we have a cache entry. If we know it is not dirty, just return it 00642 // with no computation. 00643 if (!CacheP.second) { 00644 ++NumCacheNonLocal; 00645 return Cache; 00646 } 00647 00648 // If we already have a partially computed set of results, scan them to 00649 // determine what is dirty, seeding our initial DirtyBlocks worklist. 00650 for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); 00651 I != E; ++I) 00652 if (I->getResult().isDirty()) 00653 DirtyBlocks.push_back(I->getBB()); 00654 00655 // Sort the cache so that we can do fast binary search lookups below. 00656 std::sort(Cache.begin(), Cache.end()); 00657 00658 ++NumCacheDirtyNonLocal; 00659 //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " 00660 // << Cache.size() << " cached: " << *QueryInst; 00661 } else { 00662 // Seed DirtyBlocks with each of the preds of QueryInst's block. 00663 BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); 00664 for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) 00665 DirtyBlocks.push_back(*PI); 00666 ++NumUncacheNonLocal; 00667 } 00668 00669 // isReadonlyCall - If this is a read-only call, we can be more aggressive. 00670 bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); 00671 00672 SmallPtrSet<BasicBlock*, 64> Visited; 00673 00674 unsigned NumSortedEntries = Cache.size(); 00675 DEBUG(AssertSorted(Cache)); 00676 00677 // Iterate while we still have blocks to update. 00678 while (!DirtyBlocks.empty()) { 00679 BasicBlock *DirtyBB = DirtyBlocks.back(); 00680 DirtyBlocks.pop_back(); 00681 00682 // Already processed this block? 00683 if (!Visited.insert(DirtyBB)) 00684 continue; 00685 00686 // Do a binary search to see if we already have an entry for this block in 00687 // the cache set. If so, find it. 00688 DEBUG(AssertSorted(Cache, NumSortedEntries)); 00689 NonLocalDepInfo::iterator Entry = 00690 std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, 00691 NonLocalDepEntry(DirtyBB)); 00692 if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) 00693 --Entry; 00694 00695 NonLocalDepEntry *ExistingResult = 0; 00696 if (Entry != Cache.begin()+NumSortedEntries && 00697 Entry->getBB() == DirtyBB) { 00698 // If we already have an entry, and if it isn't already dirty, the block 00699 // is done. 00700 if (!Entry->getResult().isDirty()) 00701 continue; 00702 00703 // Otherwise, remember this slot so we can update the value. 00704 ExistingResult = &*Entry; 00705 } 00706 00707 // If the dirty entry has a pointer, start scanning from it so we don't have 00708 // to rescan the entire block. 00709 BasicBlock::iterator ScanPos = DirtyBB->end(); 00710 if (ExistingResult) { 00711 if (Instruction *Inst = ExistingResult->getResult().getInst()) { 00712 ScanPos = Inst; 00713 // We're removing QueryInst's use of Inst. 00714 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, 00715 QueryCS.getInstruction()); 00716 } 00717 } 00718 00719 // Find out if this block has a local dependency for QueryInst. 00720 MemDepResult Dep; 00721 00722 if (ScanPos != DirtyBB->begin()) { 00723 Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); 00724 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { 00725 // No dependence found. If this is the entry block of the function, it is 00726 // a clobber, otherwise it is unknown. 00727 Dep = MemDepResult::getNonLocal(); 00728 } else { 00729 Dep = MemDepResult::getNonFuncLocal(); 00730 } 00731 00732 // If we had a dirty entry for the block, update it. Otherwise, just add 00733 // a new entry. 00734 if (ExistingResult) 00735 ExistingResult->setResult(Dep); 00736 else 00737 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); 00738 00739 // If the block has a dependency (i.e. it isn't completely transparent to 00740 // the value), remember the association! 00741 if (!Dep.isNonLocal()) { 00742 // Keep the ReverseNonLocalDeps map up to date so we can efficiently 00743 // update this when we remove instructions. 00744 if (Instruction *Inst = Dep.getInst()) 00745 ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); 00746 } else { 00747 00748 // If the block *is* completely transparent to the load, we need to check 00749 // the predecessors of this block. Add them to our worklist. 00750 for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) 00751 DirtyBlocks.push_back(*PI); 00752 } 00753 } 00754 00755 return Cache; 00756 } 00757 00758 /// getNonLocalPointerDependency - Perform a full dependency query for an 00759 /// access to the specified (non-volatile) memory location, returning the 00760 /// set of instructions that either define or clobber the value. 00761 /// 00762 /// This method assumes the pointer has a "NonLocal" dependency within its 00763 /// own block. 00764 /// 00765 void MemoryDependenceAnalysis:: 00766 getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, 00767 BasicBlock *FromBB, 00768 SmallVectorImpl<NonLocalDepResult> &Result) { 00769 assert(Loc.Ptr->getType()->isPointerTy() && 00770 "Can't get pointer deps of a non-pointer!"); 00771 Result.clear(); 00772 00773 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); 00774 00775 // This is the set of blocks we've inspected, and the pointer we consider in 00776 // each block. Because of critical edges, we currently bail out if querying 00777 // a block with multiple different pointers. This can happen during PHI 00778 // translation. 00779 DenseMap<BasicBlock*, Value*> Visited; 00780 if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, 00781 Result, Visited, true)) 00782 return; 00783 Result.clear(); 00784 Result.push_back(NonLocalDepResult(FromBB, 00785 MemDepResult::getUnknown(), 00786 const_cast<Value *>(Loc.Ptr))); 00787 } 00788 00789 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with 00790 /// Pointer/PointeeSize using either cached information in Cache or by doing a 00791 /// lookup (which may use dirty cache info if available). If we do a lookup, 00792 /// add the result to the cache. 00793 MemDepResult MemoryDependenceAnalysis:: 00794 GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, 00795 bool isLoad, BasicBlock *BB, 00796 NonLocalDepInfo *Cache, unsigned NumSortedEntries) { 00797 00798 // Do a binary search to see if we already have an entry for this block in 00799 // the cache set. If so, find it. 00800 NonLocalDepInfo::iterator Entry = 00801 std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, 00802 NonLocalDepEntry(BB)); 00803 if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) 00804 --Entry; 00805 00806 NonLocalDepEntry *ExistingResult = 0; 00807 if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) 00808 ExistingResult = &*Entry; 00809 00810 // If we have a cached entry, and it is non-dirty, use it as the value for 00811 // this dependency. 00812 if (ExistingResult && !ExistingResult->getResult().isDirty()) { 00813 ++NumCacheNonLocalPtr; 00814 return ExistingResult->getResult(); 00815 } 00816 00817 // Otherwise, we have to scan for the value. If we have a dirty cache 00818 // entry, start scanning from its position, otherwise we scan from the end 00819 // of the block. 00820 BasicBlock::iterator ScanPos = BB->end(); 00821 if (ExistingResult && ExistingResult->getResult().getInst()) { 00822 assert(ExistingResult->getResult().getInst()->getParent() == BB && 00823 "Instruction invalidated?"); 00824 ++NumCacheDirtyNonLocalPtr; 00825 ScanPos = ExistingResult->getResult().getInst(); 00826 00827 // Eliminating the dirty entry from 'Cache', so update the reverse info. 00828 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); 00829 RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); 00830 } else { 00831 ++NumUncacheNonLocalPtr; 00832 } 00833 00834 // Scan the block for the dependency. 00835 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); 00836 00837 // If we had a dirty entry for the block, update it. Otherwise, just add 00838 // a new entry. 00839 if (ExistingResult) 00840 ExistingResult->setResult(Dep); 00841 else 00842 Cache->push_back(NonLocalDepEntry(BB, Dep)); 00843 00844 // If the block has a dependency (i.e. it isn't completely transparent to 00845 // the value), remember the reverse association because we just added it 00846 // to Cache! 00847 if (!Dep.isDef() && !Dep.isClobber()) 00848 return Dep; 00849 00850 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently 00851 // update MemDep when we remove instructions. 00852 Instruction *Inst = Dep.getInst(); 00853 assert(Inst && "Didn't depend on anything?"); 00854 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); 00855 ReverseNonLocalPtrDeps[Inst].insert(CacheKey); 00856 return Dep; 00857 } 00858 00859 /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain 00860 /// number of elements in the array that are already properly ordered. This is 00861 /// optimized for the case when only a few entries are added. 00862 static void 00863 SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, 00864 unsigned NumSortedEntries) { 00865 switch (Cache.size() - NumSortedEntries) { 00866 case 0: 00867 // done, no new entries. 00868 break; 00869 case 2: { 00870 // Two new entries, insert the last one into place. 00871 NonLocalDepEntry Val = Cache.back(); 00872 Cache.pop_back(); 00873 MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = 00874 std::upper_bound(Cache.begin(), Cache.end()-1, Val); 00875 Cache.insert(Entry, Val); 00876 // FALL THROUGH. 00877 } 00878 case 1: 00879 // One new entry, Just insert the new value at the appropriate position. 00880 if (Cache.size() != 1) { 00881 NonLocalDepEntry Val = Cache.back(); 00882 Cache.pop_back(); 00883 MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = 00884 std::upper_bound(Cache.begin(), Cache.end(), Val); 00885 Cache.insert(Entry, Val); 00886 } 00887 break; 00888 default: 00889 // Added many values, do a full scale sort. 00890 std::sort(Cache.begin(), Cache.end()); 00891 break; 00892 } 00893 } 00894 00895 /// getNonLocalPointerDepFromBB - Perform a dependency query based on 00896 /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def 00897 /// results to the results vector and keep track of which blocks are visited in 00898 /// 'Visited'. 00899 /// 00900 /// This has special behavior for the first block queries (when SkipFirstBlock 00901 /// is true). In this special case, it ignores the contents of the specified 00902 /// block and starts returning dependence info for its predecessors. 00903 /// 00904 /// This function returns false on success, or true to indicate that it could 00905 /// not compute dependence information for some reason. This should be treated 00906 /// as a clobber dependence on the first instruction in the predecessor block. 00907 bool MemoryDependenceAnalysis:: 00908 getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, 00909 const AliasAnalysis::Location &Loc, 00910 bool isLoad, BasicBlock *StartBB, 00911 SmallVectorImpl<NonLocalDepResult> &Result, 00912 DenseMap<BasicBlock*, Value*> &Visited, 00913 bool SkipFirstBlock) { 00914 // Look up the cached info for Pointer. 00915 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); 00916 00917 // Set up a temporary NLPI value. If the map doesn't yet have an entry for 00918 // CacheKey, this value will be inserted as the associated value. Otherwise, 00919 // it'll be ignored, and we'll have to check to see if the cached size and 00920 // tbaa tag are consistent with the current query. 00921 NonLocalPointerInfo InitialNLPI; 00922 InitialNLPI.Size = Loc.Size; 00923 InitialNLPI.TBAATag = Loc.TBAATag; 00924 00925 // Get the NLPI for CacheKey, inserting one into the map if it doesn't 00926 // already have one. 00927 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 00928 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); 00929 NonLocalPointerInfo *CacheInfo = &Pair.first->second; 00930 00931 // If we already have a cache entry for this CacheKey, we may need to do some 00932 // work to reconcile the cache entry and the current query. 00933 if (!Pair.second) { 00934 if (CacheInfo->Size < Loc.Size) { 00935 // The query's Size is greater than the cached one. Throw out the 00936 // cached data and proceed with the query at the greater size. 00937 CacheInfo->Pair = BBSkipFirstBlockPair(); 00938 CacheInfo->Size = Loc.Size; 00939 for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), 00940 DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) 00941 if (Instruction *Inst = DI->getResult().getInst()) 00942 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); 00943 CacheInfo->NonLocalDeps.clear(); 00944 } else if (CacheInfo->Size > Loc.Size) { 00945 // This query's Size is less than the cached one. Conservatively restart 00946 // the query using the greater size. 00947 return getNonLocalPointerDepFromBB(Pointer, 00948 Loc.getWithNewSize(CacheInfo->Size), 00949 isLoad, StartBB, Result, Visited, 00950 SkipFirstBlock); 00951 } 00952 00953 // If the query's TBAATag is inconsistent with the cached one, 00954 // conservatively throw out the cached data and restart the query with 00955 // no tag if needed. 00956 if (CacheInfo->TBAATag != Loc.TBAATag) { 00957 if (CacheInfo->TBAATag) { 00958 CacheInfo->Pair = BBSkipFirstBlockPair(); 00959 CacheInfo->TBAATag = 0; 00960 for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), 00961 DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) 00962 if (Instruction *Inst = DI->getResult().getInst()) 00963 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); 00964 CacheInfo->NonLocalDeps.clear(); 00965 } 00966 if (Loc.TBAATag) 00967 return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), 00968 isLoad, StartBB, Result, Visited, 00969 SkipFirstBlock); 00970 } 00971 } 00972 00973 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; 00974 00975 // If we have valid cached information for exactly the block we are 00976 // investigating, just return it with no recomputation. 00977 if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { 00978 // We have a fully cached result for this query then we can just return the 00979 // cached results and populate the visited set. However, we have to verify 00980 // that we don't already have conflicting results for these blocks. Check 00981 // to ensure that if a block in the results set is in the visited set that 00982 // it was for the same pointer query. 00983 if (!Visited.empty()) { 00984 for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); 00985 I != E; ++I) { 00986 DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); 00987 if (VI == Visited.end() || VI->second == Pointer.getAddr()) 00988 continue; 00989 00990 // We have a pointer mismatch in a block. Just return clobber, saying 00991 // that something was clobbered in this result. We could also do a 00992 // non-fully cached query, but there is little point in doing this. 00993 return true; 00994 } 00995 } 00996 00997 Value *Addr = Pointer.getAddr(); 00998 for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); 00999 I != E; ++I) { 01000 Visited.insert(std::make_pair(I->getBB(), Addr)); 01001 if (I->getResult().isNonLocal()) { 01002 continue; 01003 } 01004 01005 if (!DT) { 01006 Result.push_back(NonLocalDepResult(I->getBB(), 01007 MemDepResult::getUnknown(), 01008 Addr)); 01009 } else if (DT->isReachableFromEntry(I->getBB())) { 01010 Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); 01011 } 01012 } 01013 ++NumCacheCompleteNonLocalPtr; 01014 return false; 01015 } 01016 01017 // Otherwise, either this is a new block, a block with an invalid cache 01018 // pointer or one that we're about to invalidate by putting more info into it 01019 // than its valid cache info. If empty, the result will be valid cache info, 01020 // otherwise it isn't. 01021 if (Cache->empty()) 01022 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); 01023 else 01024 CacheInfo->Pair = BBSkipFirstBlockPair(); 01025 01026 SmallVector<BasicBlock*, 32> Worklist; 01027 Worklist.push_back(StartBB); 01028 01029 // PredList used inside loop. 01030 SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; 01031 01032 // Keep track of the entries that we know are sorted. Previously cached 01033 // entries will all be sorted. The entries we add we only sort on demand (we 01034 // don't insert every element into its sorted position). We know that we 01035 // won't get any reuse from currently inserted values, because we don't 01036 // revisit blocks after we insert info for them. 01037 unsigned NumSortedEntries = Cache->size(); 01038 DEBUG(AssertSorted(*Cache)); 01039 01040 while (!Worklist.empty()) { 01041 BasicBlock *BB = Worklist.pop_back_val(); 01042 01043 // Skip the first block if we have it. 01044 if (!SkipFirstBlock) { 01045 // Analyze the dependency of *Pointer in FromBB. See if we already have 01046 // been here. 01047 assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); 01048 01049 // Get the dependency info for Pointer in BB. If we have cached 01050 // information, we will use it, otherwise we compute it. 01051 DEBUG(AssertSorted(*Cache, NumSortedEntries)); 01052 MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, 01053 NumSortedEntries); 01054 01055 // If we got a Def or Clobber, add this to the list of results. 01056 if (!Dep.isNonLocal()) { 01057 if (!DT) { 01058 Result.push_back(NonLocalDepResult(BB, 01059 MemDepResult::getUnknown(), 01060 Pointer.getAddr())); 01061 continue; 01062 } else if (DT->isReachableFromEntry(BB)) { 01063 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); 01064 continue; 01065 } 01066 } 01067 } 01068 01069 // If 'Pointer' is an instruction defined in this block, then we need to do 01070 // phi translation to change it into a value live in the predecessor block. 01071 // If not, we just add the predecessors to the worklist and scan them with 01072 // the same Pointer. 01073 if (!Pointer.NeedsPHITranslationFromBlock(BB)) { 01074 SkipFirstBlock = false; 01075 SmallVector<BasicBlock*, 16> NewBlocks; 01076 for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { 01077 // Verify that we haven't looked at this block yet. 01078 std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> 01079 InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); 01080 if (InsertRes.second) { 01081 // First time we've looked at *PI. 01082 NewBlocks.push_back(*PI); 01083 continue; 01084 } 01085 01086 // If we have seen this block before, but it was with a different 01087 // pointer then we have a phi translation failure and we have to treat 01088 // this as a clobber. 01089 if (InsertRes.first->second != Pointer.getAddr()) { 01090 // Make sure to clean up the Visited map before continuing on to 01091 // PredTranslationFailure. 01092 for (unsigned i = 0; i < NewBlocks.size(); i++) 01093 Visited.erase(NewBlocks[i]); 01094 goto PredTranslationFailure; 01095 } 01096 } 01097 Worklist.append(NewBlocks.begin(), NewBlocks.end()); 01098 continue; 01099 } 01100 01101 // We do need to do phi translation, if we know ahead of time we can't phi 01102 // translate this value, don't even try. 01103 if (!Pointer.IsPotentiallyPHITranslatable()) 01104 goto PredTranslationFailure; 01105 01106 // We may have added values to the cache list before this PHI translation. 01107 // If so, we haven't done anything to ensure that the cache remains sorted. 01108 // Sort it now (if needed) so that recursive invocations of 01109 // getNonLocalPointerDepFromBB and other routines that could reuse the cache 01110 // value will only see properly sorted cache arrays. 01111 if (Cache && NumSortedEntries != Cache->size()) { 01112 SortNonLocalDepInfoCache(*Cache, NumSortedEntries); 01113 NumSortedEntries = Cache->size(); 01114 } 01115 Cache = 0; 01116 01117 PredList.clear(); 01118 for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { 01119 BasicBlock *Pred = *PI; 01120 PredList.push_back(std::make_pair(Pred, Pointer)); 01121 01122 // Get the PHI translated pointer in this predecessor. This can fail if 01123 // not translatable, in which case the getAddr() returns null. 01124 PHITransAddr &PredPointer = PredList.back().second; 01125 PredPointer.PHITranslateValue(BB, Pred, 0); 01126 01127 Value *PredPtrVal = PredPointer.getAddr(); 01128 01129 // Check to see if we have already visited this pred block with another 01130 // pointer. If so, we can't do this lookup. This failure can occur 01131 // with PHI translation when a critical edge exists and the PHI node in 01132 // the successor translates to a pointer value different than the 01133 // pointer the block was first analyzed with. 01134 std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> 01135 InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); 01136 01137 if (!InsertRes.second) { 01138 // We found the pred; take it off the list of preds to visit. 01139 PredList.pop_back(); 01140 01141 // If the predecessor was visited with PredPtr, then we already did 01142 // the analysis and can ignore it. 01143 if (InsertRes.first->second == PredPtrVal) 01144 continue; 01145 01146 // Otherwise, the block was previously analyzed with a different 01147 // pointer. We can't represent the result of this case, so we just 01148 // treat this as a phi translation failure. 01149 01150 // Make sure to clean up the Visited map before continuing on to 01151 // PredTranslationFailure. 01152 for (unsigned i = 0, n = PredList.size(); i < n; ++i) 01153 Visited.erase(PredList[i].first); 01154 01155 goto PredTranslationFailure; 01156 } 01157 } 01158 01159 // Actually process results here; this need to be a separate loop to avoid 01160 // calling getNonLocalPointerDepFromBB for blocks we don't want to return 01161 // any results for. (getNonLocalPointerDepFromBB will modify our 01162 // datastructures in ways the code after the PredTranslationFailure label 01163 // doesn't expect.) 01164 for (unsigned i = 0, n = PredList.size(); i < n; ++i) { 01165 BasicBlock *Pred = PredList[i].first; 01166 PHITransAddr &PredPointer = PredList[i].second; 01167 Value *PredPtrVal = PredPointer.getAddr(); 01168 01169 bool CanTranslate = true; 01170 // If PHI translation was unable to find an available pointer in this 01171 // predecessor, then we have to assume that the pointer is clobbered in 01172 // that predecessor. We can still do PRE of the load, which would insert 01173 // a computation of the pointer in this predecessor. 01174 if (PredPtrVal == 0) 01175 CanTranslate = false; 01176 01177 // FIXME: it is entirely possible that PHI translating will end up with 01178 // the same value. Consider PHI translating something like: 01179 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* 01180 // to recurse here, pedantically speaking. 01181 01182 // If getNonLocalPointerDepFromBB fails here, that means the cached 01183 // result conflicted with the Visited list; we have to conservatively 01184 // assume it is unknown, but this also does not block PRE of the load. 01185 if (!CanTranslate || 01186 getNonLocalPointerDepFromBB(PredPointer, 01187 Loc.getWithNewPtr(PredPtrVal), 01188 isLoad, Pred, 01189 Result, Visited)) { 01190 // Add the entry to the Result list. 01191 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); 01192 Result.push_back(Entry); 01193 01194 // Since we had a phi translation failure, the cache for CacheKey won't 01195 // include all of the entries that we need to immediately satisfy future 01196 // queries. Mark this in NonLocalPointerDeps by setting the 01197 // BBSkipFirstBlockPair pointer to null. This requires reuse of the 01198 // cached value to do more work but not miss the phi trans failure. 01199 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; 01200 NLPI.Pair = BBSkipFirstBlockPair(); 01201 continue; 01202 } 01203 } 01204 01205 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. 01206 CacheInfo = &NonLocalPointerDeps[CacheKey]; 01207 Cache = &CacheInfo->NonLocalDeps; 01208 NumSortedEntries = Cache->size(); 01209 01210 // Since we did phi translation, the "Cache" set won't contain all of the 01211 // results for the query. This is ok (we can still use it to accelerate 01212 // specific block queries) but we can't do the fastpath "return all 01213 // results from the set" Clear out the indicator for this. 01214 CacheInfo->Pair = BBSkipFirstBlockPair(); 01215 SkipFirstBlock = false; 01216 continue; 01217 01218 PredTranslationFailure: 01219 // The following code is "failure"; we can't produce a sane translation 01220 // for the given block. It assumes that we haven't modified any of 01221 // our datastructures while processing the current block. 01222 01223 if (Cache == 0) { 01224 // Refresh the CacheInfo/Cache pointer if it got invalidated. 01225 CacheInfo = &NonLocalPointerDeps[CacheKey]; 01226 Cache = &CacheInfo->NonLocalDeps; 01227 NumSortedEntries = Cache->size(); 01228 } 01229 01230 // Since we failed phi translation, the "Cache" set won't contain all of the 01231 // results for the query. This is ok (we can still use it to accelerate 01232 // specific block queries) but we can't do the fastpath "return all 01233 // results from the set". Clear out the indicator for this. 01234 CacheInfo->Pair = BBSkipFirstBlockPair(); 01235 01236 // If *nothing* works, mark the pointer as unknown. 01237 // 01238 // If this is the magic first block, return this as a clobber of the whole 01239 // incoming value. Since we can't phi translate to one of the predecessors, 01240 // we have to bail out. 01241 if (SkipFirstBlock) 01242 return true; 01243 01244 for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { 01245 assert(I != Cache->rend() && "Didn't find current block??"); 01246 if (I->getBB() != BB) 01247 continue; 01248 01249 assert(I->getResult().isNonLocal() && 01250 "Should only be here with transparent block"); 01251 I->setResult(MemDepResult::getUnknown()); 01252 Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), 01253 Pointer.getAddr())); 01254 break; 01255 } 01256 } 01257 01258 // Okay, we're done now. If we added new values to the cache, re-sort it. 01259 SortNonLocalDepInfoCache(*Cache, NumSortedEntries); 01260 DEBUG(AssertSorted(*Cache)); 01261 return false; 01262 } 01263 01264 /// RemoveCachedNonLocalPointerDependencies - If P exists in 01265 /// CachedNonLocalPointerInfo, remove it. 01266 void MemoryDependenceAnalysis:: 01267 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { 01268 CachedNonLocalPointerInfo::iterator It = 01269 NonLocalPointerDeps.find(P); 01270 if (It == NonLocalPointerDeps.end()) return; 01271 01272 // Remove all of the entries in the BB->val map. This involves removing 01273 // instructions from the reverse map. 01274 NonLocalDepInfo &PInfo = It->second.NonLocalDeps; 01275 01276 for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { 01277 Instruction *Target = PInfo[i].getResult().getInst(); 01278 if (Target == 0) continue; // Ignore non-local dep results. 01279 assert(Target->getParent() == PInfo[i].getBB()); 01280 01281 // Eliminating the dirty entry from 'Cache', so update the reverse info. 01282 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); 01283 } 01284 01285 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). 01286 NonLocalPointerDeps.erase(It); 01287 } 01288 01289 01290 /// invalidateCachedPointerInfo - This method is used to invalidate cached 01291 /// information about the specified pointer, because it may be too 01292 /// conservative in memdep. This is an optional call that can be used when 01293 /// the client detects an equivalence between the pointer and some other 01294 /// value and replaces the other value with ptr. This can make Ptr available 01295 /// in more places that cached info does not necessarily keep. 01296 void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { 01297 // If Ptr isn't really a pointer, just ignore it. 01298 if (!Ptr->getType()->isPointerTy()) return; 01299 // Flush store info for the pointer. 01300 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); 01301 // Flush load info for the pointer. 01302 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); 01303 } 01304 01305 /// invalidateCachedPredecessors - Clear the PredIteratorCache info. 01306 /// This needs to be done when the CFG changes, e.g., due to splitting 01307 /// critical edges. 01308 void MemoryDependenceAnalysis::invalidateCachedPredecessors() { 01309 PredCache->clear(); 01310 } 01311 01312 /// removeInstruction - Remove an instruction from the dependence analysis, 01313 /// updating the dependence of instructions that previously depended on it. 01314 /// This method attempts to keep the cache coherent using the reverse map. 01315 void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { 01316 // Walk through the Non-local dependencies, removing this one as the value 01317 // for any cached queries. 01318 NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); 01319 if (NLDI != NonLocalDeps.end()) { 01320 NonLocalDepInfo &BlockMap = NLDI->second.first; 01321 for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); 01322 DI != DE; ++DI) 01323 if (Instruction *Inst = DI->getResult().getInst()) 01324 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); 01325 NonLocalDeps.erase(NLDI); 01326 } 01327 01328 // If we have a cached local dependence query for this instruction, remove it. 01329 // 01330 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); 01331 if (LocalDepEntry != LocalDeps.end()) { 01332 // Remove us from DepInst's reverse set now that the local dep info is gone. 01333 if (Instruction *Inst = LocalDepEntry->second.getInst()) 01334 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); 01335 01336 // Remove this local dependency info. 01337 LocalDeps.erase(LocalDepEntry); 01338 } 01339 01340 // If we have any cached pointer dependencies on this instruction, remove 01341 // them. If the instruction has non-pointer type, then it can't be a pointer 01342 // base. 01343 01344 // Remove it from both the load info and the store info. The instruction 01345 // can't be in either of these maps if it is non-pointer. 01346 if (RemInst->getType()->isPointerTy()) { 01347 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); 01348 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); 01349 } 01350 01351 // Loop over all of the things that depend on the instruction we're removing. 01352 // 01353 SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; 01354 01355 // If we find RemInst as a clobber or Def in any of the maps for other values, 01356 // we need to replace its entry with a dirty version of the instruction after 01357 // it. If RemInst is a terminator, we use a null dirty value. 01358 // 01359 // Using a dirty version of the instruction after RemInst saves having to scan 01360 // the entire block to get to this point. 01361 MemDepResult NewDirtyVal; 01362 if (!RemInst->isTerminator()) 01363 NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); 01364 01365 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); 01366 if (ReverseDepIt != ReverseLocalDeps.end()) { 01367 SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; 01368 // RemInst can't be the terminator if it has local stuff depending on it. 01369 assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && 01370 "Nothing can locally depend on a terminator"); 01371 01372 for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), 01373 E = ReverseDeps.end(); I != E; ++I) { 01374 Instruction *InstDependingOnRemInst = *I; 01375 assert(InstDependingOnRemInst != RemInst && 01376 "Already removed our local dep info"); 01377 01378 LocalDeps[InstDependingOnRemInst] = NewDirtyVal; 01379 01380 // Make sure to remember that new things depend on NewDepInst. 01381 assert(NewDirtyVal.getInst() && "There is no way something else can have " 01382 "a local dep on this if it is a terminator!"); 01383 ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 01384 InstDependingOnRemInst)); 01385 } 01386 01387 ReverseLocalDeps.erase(ReverseDepIt); 01388 01389 // Add new reverse deps after scanning the set, to avoid invalidating the 01390 // 'ReverseDeps' reference. 01391 while (!ReverseDepsToAdd.empty()) { 01392 ReverseLocalDeps[ReverseDepsToAdd.back().first] 01393 .insert(ReverseDepsToAdd.back().second); 01394 ReverseDepsToAdd.pop_back(); 01395 } 01396 } 01397 01398 ReverseDepIt = ReverseNonLocalDeps.find(RemInst); 01399 if (ReverseDepIt != ReverseNonLocalDeps.end()) { 01400 SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; 01401 for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); 01402 I != E; ++I) { 01403 assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); 01404 01405 PerInstNLInfo &INLD = NonLocalDeps[*I]; 01406 // The information is now dirty! 01407 INLD.second = true; 01408 01409 for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 01410 DE = INLD.first.end(); DI != DE; ++DI) { 01411 if (DI->getResult().getInst() != RemInst) continue; 01412 01413 // Convert to a dirty entry for the subsequent instruction. 01414 DI->setResult(NewDirtyVal); 01415 01416 if (Instruction *NextI = NewDirtyVal.getInst()) 01417 ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); 01418 } 01419 } 01420 01421 ReverseNonLocalDeps.erase(ReverseDepIt); 01422 01423 // Add new reverse deps after scanning the set, to avoid invalidating 'Set' 01424 while (!ReverseDepsToAdd.empty()) { 01425 ReverseNonLocalDeps[ReverseDepsToAdd.back().first] 01426 .insert(ReverseDepsToAdd.back().second); 01427 ReverseDepsToAdd.pop_back(); 01428 } 01429 } 01430 01431 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a 01432 // value in the NonLocalPointerDeps info. 01433 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = 01434 ReverseNonLocalPtrDeps.find(RemInst); 01435 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { 01436 SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; 01437 SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; 01438 01439 for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), 01440 E = Set.end(); I != E; ++I) { 01441 ValueIsLoadPair P = *I; 01442 assert(P.getPointer() != RemInst && 01443 "Already removed NonLocalPointerDeps info for RemInst"); 01444 01445 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; 01446 01447 // The cache is not valid for any specific block anymore. 01448 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); 01449 01450 // Update any entries for RemInst to use the instruction after it. 01451 for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); 01452 DI != DE; ++DI) { 01453 if (DI->getResult().getInst() != RemInst) continue; 01454 01455 // Convert to a dirty entry for the subsequent instruction. 01456 DI->setResult(NewDirtyVal); 01457 01458 if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) 01459 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); 01460 } 01461 01462 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its 01463 // subsequent value may invalidate the sortedness. 01464 std::sort(NLPDI.begin(), NLPDI.end()); 01465 } 01466 01467 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); 01468 01469 while (!ReversePtrDepsToAdd.empty()) { 01470 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] 01471 .insert(ReversePtrDepsToAdd.back().second); 01472 ReversePtrDepsToAdd.pop_back(); 01473 } 01474 } 01475 01476 01477 assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); 01478 AA->deleteValue(RemInst); 01479 DEBUG(verifyRemoved(RemInst)); 01480 } 01481 /// verifyRemoved - Verify that the specified instruction does not occur 01482 /// in our internal data structures. 01483 void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { 01484 for (LocalDepMapType::const_iterator I = LocalDeps.begin(), 01485 E = LocalDeps.end(); I != E; ++I) { 01486 assert(I->first != D && "Inst occurs in data structures"); 01487 assert(I->second.getInst() != D && 01488 "Inst occurs in data structures"); 01489 } 01490 01491 for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), 01492 E = NonLocalPointerDeps.end(); I != E; ++I) { 01493 assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); 01494 const NonLocalDepInfo &Val = I->second.NonLocalDeps; 01495 for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); 01496 II != E; ++II) 01497 assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); 01498 } 01499 01500 for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), 01501 E = NonLocalDeps.end(); I != E; ++I) { 01502 assert(I->first != D && "Inst occurs in data structures"); 01503 const PerInstNLInfo &INLD = I->second; 01504 for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), 01505 EE = INLD.first.end(); II != EE; ++II) 01506 assert(II->getResult().getInst() != D && "Inst occurs in data structures"); 01507 } 01508 01509 for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), 01510 E = ReverseLocalDeps.end(); I != E; ++I) { 01511 assert(I->first != D && "Inst occurs in data structures"); 01512 for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), 01513 EE = I->second.end(); II != EE; ++II) 01514 assert(*II != D && "Inst occurs in data structures"); 01515 } 01516 01517 for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), 01518 E = ReverseNonLocalDeps.end(); 01519 I != E; ++I) { 01520 assert(I->first != D && "Inst occurs in data structures"); 01521 for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), 01522 EE = I->second.end(); II != EE; ++II) 01523 assert(*II != D && "Inst occurs in data structures"); 01524 } 01525 01526 for (ReverseNonLocalPtrDepTy::const_iterator 01527 I = ReverseNonLocalPtrDeps.begin(), 01528 E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { 01529 assert(I->first != D && "Inst occurs in rev NLPD map"); 01530 01531 for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), 01532 E = I->second.end(); II != E; ++II) 01533 assert(*II != ValueIsLoadPair(D, false) && 01534 *II != ValueIsLoadPair(D, true) && 01535 "Inst occurs in ReverseNonLocalPtrDeps map"); 01536 } 01537 01538 }