LLVM  10.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an analysis that determines, for a given memory
10 // operation, what preceding memory operations it depends on. It builds on
11 // alias analysis information, and tries to provide a lazy, caching interface to
12 // a common kind of alias information query.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/Metadata.h"
44 #include "llvm/IR/Module.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Use.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "memdep"
66 
67 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
68 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
69 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
70 
71 STATISTIC(NumCacheNonLocalPtr,
72  "Number of fully cached non-local ptr responses");
73 STATISTIC(NumCacheDirtyNonLocalPtr,
74  "Number of cached, but dirty, non-local ptr responses");
75 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
76 STATISTIC(NumCacheCompleteNonLocalPtr,
77  "Number of block queries that were completely cached");
78 
79 // Limit for the number of instructions to scan in a block.
80 
82  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
83  cl::desc("The number of instructions to scan in a block in memory "
84  "dependency analysis (default = 100)"));
85 
86 static cl::opt<unsigned>
87  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
88  cl::desc("The number of blocks to scan during memory "
89  "dependency analysis (default = 1000)"));
90 
91 // Limit on the number of memdep results to process.
92 static const unsigned int NumResultsLimit = 100;
93 
94 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
95 ///
96 /// If the set becomes empty, remove Inst's entry.
97 template <typename KeyTy>
98 static void
100  Instruction *Inst, KeyTy Val) {
101  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
102  ReverseMap.find(Inst);
103  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
104  bool Found = InstIt->second.erase(Val);
105  assert(Found && "Invalid reverse map!");
106  (void)Found;
107  if (InstIt->second.empty())
108  ReverseMap.erase(InstIt);
109 }
110 
111 /// If the given instruction references a specific memory location, fill in Loc
112 /// with the details, otherwise set Loc.Ptr to null.
113 ///
114 /// Returns a ModRefInfo value describing the general behavior of the
115 /// instruction.
117  const TargetLibraryInfo &TLI) {
118  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
119  if (LI->isUnordered()) {
120  Loc = MemoryLocation::get(LI);
121  return ModRefInfo::Ref;
122  }
123  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
124  Loc = MemoryLocation::get(LI);
125  return ModRefInfo::ModRef;
126  }
127  Loc = MemoryLocation();
128  return ModRefInfo::ModRef;
129  }
130 
131  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
132  if (SI->isUnordered()) {
133  Loc = MemoryLocation::get(SI);
134  return ModRefInfo::Mod;
135  }
136  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
137  Loc = MemoryLocation::get(SI);
138  return ModRefInfo::ModRef;
139  }
140  Loc = MemoryLocation();
141  return ModRefInfo::ModRef;
142  }
143 
144  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
145  Loc = MemoryLocation::get(V);
146  return ModRefInfo::ModRef;
147  }
148 
149  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
150  // calls to free() deallocate the entire structure
151  Loc = MemoryLocation(CI->getArgOperand(0));
152  return ModRefInfo::Mod;
153  }
154 
155  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
156  switch (II->getIntrinsicID()) {
157  case Intrinsic::lifetime_start:
158  case Intrinsic::lifetime_end:
159  case Intrinsic::invariant_start:
160  Loc = MemoryLocation::getForArgument(II, 1, TLI);
161  // These intrinsics don't really modify the memory, but returning Mod
162  // will allow them to be handled conservatively.
163  return ModRefInfo::Mod;
164  case Intrinsic::invariant_end:
165  Loc = MemoryLocation::getForArgument(II, 2, TLI);
166  // These intrinsics don't really modify the memory, but returning Mod
167  // will allow them to be handled conservatively.
168  return ModRefInfo::Mod;
169  default:
170  break;
171  }
172  }
173 
174  // Otherwise, just do the coarse-grained thing that always works.
175  if (Inst->mayWriteToMemory())
176  return ModRefInfo::ModRef;
177  if (Inst->mayReadFromMemory())
178  return ModRefInfo::Ref;
179  return ModRefInfo::NoModRef;
180 }
181 
182 /// Private helper for finding the local dependencies of a call site.
183 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184  CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185  BasicBlock *BB) {
186  unsigned Limit = getDefaultBlockScanLimit();
187 
188  // Walk backwards through the block, looking for dependencies.
189  while (ScanIt != BB->begin()) {
190  Instruction *Inst = &*--ScanIt;
191  // Debug intrinsics don't cause dependences and should not affect Limit
192  if (isa<DbgInfoIntrinsic>(Inst))
193  continue;
194 
195  // Limit the amount of scanning we do so we don't end up with quadratic
196  // running time on extreme testcases.
197  --Limit;
198  if (!Limit)
199  return MemDepResult::getUnknown();
200 
201  // If this inst is a memory op, get the pointer it accessed
202  MemoryLocation Loc;
203  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204  if (Loc.Ptr) {
205  // A simple instruction.
206  if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207  return MemDepResult::getClobber(Inst);
208  continue;
209  }
210 
211  if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212  // If these two calls do not interfere, look past it.
213  if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214  // If the two calls are the same, return Inst as a Def, so that
215  // Call can be found redundant and eliminated.
216  if (isReadOnlyCall && !isModSet(MR) &&
217  Call->isIdenticalToWhenDefined(CallB))
218  return MemDepResult::getDef(Inst);
219 
220  // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221  // keep scanning.
222  continue;
223  } else
224  return MemDepResult::getClobber(Inst);
225  }
226 
227  // If we could not obtain a pointer for the instruction and the instruction
228  // touches memory then assume that this is a dependency.
229  if (isModOrRefSet(MR))
230  return MemDepResult::getClobber(Inst);
231  }
232 
233  // No dependence found. If this is the entry block of the function, it is
234  // unknown, otherwise it is non-local.
235  if (BB != &BB->getParent()->getEntryBlock())
236  return MemDepResult::getNonLocal();
238 }
239 
241  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
242  const LoadInst *LI) {
243  // We can only extend simple integer loads.
244  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
245  return 0;
246 
247  // Load widening is hostile to ThreadSanitizer: it may cause false positives
248  // or make the reports more cryptic (access sizes are wrong).
249  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
250  return 0;
251 
252  const DataLayout &DL = LI->getModule()->getDataLayout();
253 
254  // Get the base of this load.
255  int64_t LIOffs = 0;
256  const Value *LIBase =
258 
259  // If the two pointers are not based on the same pointer, we can't tell that
260  // they are related.
261  if (LIBase != MemLocBase)
262  return 0;
263 
264  // Okay, the two values are based on the same pointer, but returned as
265  // no-alias. This happens when we have things like two byte loads at "P+1"
266  // and "P+3". Check to see if increasing the size of the "LI" load up to its
267  // alignment (or the largest native integer type) will allow us to load all
268  // the bits required by MemLoc.
269 
270  // If MemLoc is before LI, then no widening of LI will help us out.
271  if (MemLocOffs < LIOffs)
272  return 0;
273 
274  // Get the alignment of the load in bytes. We assume that it is safe to load
275  // any legal integer up to this size without a problem. For example, if we're
276  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
277  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
278  // to i16.
279  unsigned LoadAlign = LI->getAlignment();
280 
281  int64_t MemLocEnd = MemLocOffs + MemLocSize;
282 
283  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
284  if (LIOffs + LoadAlign < MemLocEnd)
285  return 0;
286 
287  // This is the size of the load to try. Start with the next larger power of
288  // two.
289  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
290  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
291 
292  while (true) {
293  // If this load size is bigger than our known alignment or would not fit
294  // into a native integer register, then we fail.
295  if (NewLoadByteSize > LoadAlign ||
296  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
297  return 0;
298 
299  if (LIOffs + NewLoadByteSize > MemLocEnd &&
301  Attribute::SanitizeAddress) ||
303  Attribute::SanitizeHWAddress)))
304  // We will be reading past the location accessed by the original program.
305  // While this is safe in a regular build, Address Safety analysis tools
306  // may start reporting false warnings. So, don't do widening.
307  return 0;
308 
309  // If a load of this width would include all of MemLoc, then we succeed.
310  if (LIOffs + NewLoadByteSize >= MemLocEnd)
311  return NewLoadByteSize;
312 
313  NewLoadByteSize <<= 1;
314  }
315 }
316 
317 static bool isVolatile(Instruction *Inst) {
318  if (auto *LI = dyn_cast<LoadInst>(Inst))
319  return LI->isVolatile();
320  if (auto *SI = dyn_cast<StoreInst>(Inst))
321  return SI->isVolatile();
322  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
323  return AI->isVolatile();
324  return false;
325 }
326 
328  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
329  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
330  OrderedBasicBlock *OBB) {
331  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
332  if (QueryInst != nullptr) {
333  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
334  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
335 
336  if (InvariantGroupDependency.isDef())
337  return InvariantGroupDependency;
338  }
339  }
341  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB);
342  if (SimpleDep.isDef())
343  return SimpleDep;
344  // Non-local invariant group dependency indicates there is non local Def
345  // (it only returns nonLocal if it finds nonLocal def), which is better than
346  // local clobber and everything else.
347  if (InvariantGroupDependency.isNonLocal())
348  return InvariantGroupDependency;
349 
350  assert(InvariantGroupDependency.isUnknown() &&
351  "InvariantGroupDependency should be only unknown at this point");
352  return SimpleDep;
353 }
354 
357  BasicBlock *BB) {
358 
359  if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
360  return MemDepResult::getUnknown();
361 
362  // Take the ptr operand after all casts and geps 0. This way we can search
363  // cast graph down only.
364  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
365 
366  // It's is not safe to walk the use list of global value, because function
367  // passes aren't allowed to look outside their functions.
368  // FIXME: this could be fixed by filtering instructions from outside
369  // of current function.
370  if (isa<GlobalValue>(LoadOperand))
371  return MemDepResult::getUnknown();
372 
373  // Queue to process all pointers that are equivalent to load operand.
374  SmallVector<const Value *, 8> LoadOperandsQueue;
375  LoadOperandsQueue.push_back(LoadOperand);
376 
377  Instruction *ClosestDependency = nullptr;
378  // Order of instructions in uses list is unpredictible. In order to always
379  // get the same result, we will look for the closest dominance.
380  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
381  assert(Other && "Must call it with not null instruction");
382  if (Best == nullptr || DT.dominates(Best, Other))
383  return Other;
384  return Best;
385  };
386 
387  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
388  // we will see all the instructions. This should be fixed in MSSA.
389  while (!LoadOperandsQueue.empty()) {
390  const Value *Ptr = LoadOperandsQueue.pop_back_val();
391  assert(Ptr && !isa<GlobalValue>(Ptr) &&
392  "Null or GlobalValue should not be inserted");
393 
394  for (const Use &Us : Ptr->uses()) {
395  auto *U = dyn_cast<Instruction>(Us.getUser());
396  if (!U || U == LI || !DT.dominates(U, LI))
397  continue;
398 
399  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
400  // users. U = bitcast Ptr
401  if (isa<BitCastInst>(U)) {
402  LoadOperandsQueue.push_back(U);
403  continue;
404  }
405  // Gep with zeros is equivalent to bitcast.
406  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
407  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
408  // typeless pointers will be ready then both cases will be gone
409  // (and this BFS also won't be needed).
410  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
411  if (GEP->hasAllZeroIndices()) {
412  LoadOperandsQueue.push_back(U);
413  continue;
414  }
415 
416  // If we hit load/store with the same invariant.group metadata (and the
417  // same pointer operand) we can assume that value pointed by pointer
418  // operand didn't change.
419  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
420  U->hasMetadata(LLVMContext::MD_invariant_group))
421  ClosestDependency = GetClosestDependency(ClosestDependency, U);
422  }
423  }
424 
425  if (!ClosestDependency)
426  return MemDepResult::getUnknown();
427  if (ClosestDependency->getParent() == BB)
428  return MemDepResult::getDef(ClosestDependency);
429  // Def(U) can't be returned here because it is non-local. If local
430  // dependency won't be found then return nonLocal counting that the
431  // user will call getNonLocalPointerDependency, which will return cached
432  // result.
433  NonLocalDefsCache.try_emplace(
434  LI, NonLocalDepResult(ClosestDependency->getParent(),
435  MemDepResult::getDef(ClosestDependency), nullptr));
436  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
437  return MemDepResult::getNonLocal();
438 }
439 
441  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
442  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
443  OrderedBasicBlock *OBB) {
444  bool isInvariantLoad = false;
445 
446  unsigned DefaultLimit = getDefaultBlockScanLimit();
447  if (!Limit)
448  Limit = &DefaultLimit;
449 
450  // We must be careful with atomic accesses, as they may allow another thread
451  // to touch this location, clobbering it. We are conservative: if the
452  // QueryInst is not a simple (non-atomic) memory access, we automatically
453  // return getClobber.
454  // If it is simple, we know based on the results of
455  // "Compiler testing via a theory of sound optimisations in the C11/C++11
456  // memory model" in PLDI 2013, that a non-atomic location can only be
457  // clobbered between a pair of a release and an acquire action, with no
458  // access to the location in between.
459  // Here is an example for giving the general intuition behind this rule.
460  // In the following code:
461  // store x 0;
462  // release action; [1]
463  // acquire action; [4]
464  // %val = load x;
465  // It is unsafe to replace %val by 0 because another thread may be running:
466  // acquire action; [2]
467  // store x 42;
468  // release action; [3]
469  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
470  // being 42. A key property of this program however is that if either
471  // 1 or 4 were missing, there would be a race between the store of 42
472  // either the store of 0 or the load (making the whole program racy).
473  // The paper mentioned above shows that the same property is respected
474  // by every program that can detect any optimization of that kind: either
475  // it is racy (undefined) or there is a release followed by an acquire
476  // between the pair of accesses under consideration.
477 
478  // If the load is invariant, we "know" that it doesn't alias *any* write. We
479  // do want to respect mustalias results since defs are useful for value
480  // forwarding, but any mayalias write can be assumed to be noalias.
481  // Arguably, this logic should be pushed inside AliasAnalysis itself.
482  if (isLoad && QueryInst) {
483  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
484  if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
485  isInvariantLoad = true;
486  }
487 
488  const DataLayout &DL = BB->getModule()->getDataLayout();
489 
490  // If the caller did not provide an ordered basic block,
491  // create one to lazily compute and cache instruction
492  // positions inside a BB. This is used to provide fast queries for relative
493  // position between two instructions in a BB and can be used by
494  // AliasAnalysis::callCapturesBefore.
495  OrderedBasicBlock OBBTmp(BB);
496  if (!OBB)
497  OBB = &OBBTmp;
498 
499  // Return "true" if and only if the instruction I is either a non-simple
500  // load or a non-simple store.
501  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
502  if (auto *LI = dyn_cast<LoadInst>(I))
503  return !LI->isSimple();
504  if (auto *SI = dyn_cast<StoreInst>(I))
505  return !SI->isSimple();
506  return false;
507  };
508 
509  // Return "true" if I is not a load and not a store, but it does access
510  // memory.
511  auto isOtherMemAccess = [](Instruction *I) -> bool {
512  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
513  };
514 
515  // Walk backwards through the basic block, looking for dependencies.
516  while (ScanIt != BB->begin()) {
517  Instruction *Inst = &*--ScanIt;
518 
519  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
520  // Debug intrinsics don't (and can't) cause dependencies.
521  if (isa<DbgInfoIntrinsic>(II))
522  continue;
523 
524  // Limit the amount of scanning we do so we don't end up with quadratic
525  // running time on extreme testcases.
526  --*Limit;
527  if (!*Limit)
528  return MemDepResult::getUnknown();
529 
530  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
531  // If we reach a lifetime begin or end marker, then the query ends here
532  // because the value is undefined.
533  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
534  // FIXME: This only considers queries directly on the invariant-tagged
535  // pointer, not on query pointers that are indexed off of them. It'd
536  // be nice to handle that at some point (the right approach is to use
537  // GetPointerBaseWithConstantOffset).
538  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
539  return MemDepResult::getDef(II);
540  continue;
541  }
542  }
543 
544  // Values depend on loads if the pointers are must aliased. This means
545  // that a load depends on another must aliased load from the same value.
546  // One exception is atomic loads: a value can depend on an atomic load that
547  // it does not alias with when this atomic load indicates that another
548  // thread may be accessing the location.
549  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
550  // While volatile access cannot be eliminated, they do not have to clobber
551  // non-aliasing locations, as normal accesses, for example, can be safely
552  // reordered with volatile accesses.
553  if (LI->isVolatile()) {
554  if (!QueryInst)
555  // Original QueryInst *may* be volatile
556  return MemDepResult::getClobber(LI);
557  if (isVolatile(QueryInst))
558  // Ordering required if QueryInst is itself volatile
559  return MemDepResult::getClobber(LI);
560  // Otherwise, volatile doesn't imply any special ordering
561  }
562 
563  // Atomic loads have complications involved.
564  // A Monotonic (or higher) load is OK if the query inst is itself not
565  // atomic.
566  // FIXME: This is overly conservative.
567  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
568  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
569  isOtherMemAccess(QueryInst))
570  return MemDepResult::getClobber(LI);
571  if (LI->getOrdering() != AtomicOrdering::Monotonic)
572  return MemDepResult::getClobber(LI);
573  }
574 
575  MemoryLocation LoadLoc = MemoryLocation::get(LI);
576 
577  // If we found a pointer, check if it could be the same as our pointer.
578  AliasResult R = AA.alias(LoadLoc, MemLoc);
579 
580  if (isLoad) {
581  if (R == NoAlias)
582  continue;
583 
584  // Must aliased loads are defs of each other.
585  if (R == MustAlias)
586  return MemDepResult::getDef(Inst);
587 
588 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
589  // in terms of clobbering loads, but since it does this by looking
590  // at the clobbering load directly, it doesn't know about any
591  // phi translation that may have happened along the way.
592 
593  // If we have a partial alias, then return this as a clobber for the
594  // client to handle.
595  if (R == PartialAlias)
596  return MemDepResult::getClobber(Inst);
597 #endif
598 
599  // Random may-alias loads don't depend on each other without a
600  // dependence.
601  continue;
602  }
603 
604  // Stores don't depend on other no-aliased accesses.
605  if (R == NoAlias)
606  continue;
607 
608  // Stores don't alias loads from read-only memory.
609  if (AA.pointsToConstantMemory(LoadLoc))
610  continue;
611 
612  // Stores depend on may/must aliased loads.
613  return MemDepResult::getDef(Inst);
614  }
615 
616  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
617  // Atomic stores have complications involved.
618  // A Monotonic store is OK if the query inst is itself not atomic.
619  // FIXME: This is overly conservative.
620  if (!SI->isUnordered() && SI->isAtomic()) {
621  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
622  isOtherMemAccess(QueryInst))
624  if (SI->getOrdering() != AtomicOrdering::Monotonic)
626  }
627 
628  // FIXME: this is overly conservative.
629  // While volatile access cannot be eliminated, they do not have to clobber
630  // non-aliasing locations, as normal accesses can for example be reordered
631  // with volatile accesses.
632  if (SI->isVolatile())
633  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
634  isOtherMemAccess(QueryInst))
636 
637  // If alias analysis can tell that this store is guaranteed to not modify
638  // the query pointer, ignore it. Use getModRefInfo to handle cases where
639  // the query pointer points to constant memory etc.
640  if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
641  continue;
642 
643  // Ok, this store might clobber the query pointer. Check to see if it is
644  // a must alias: in this case, we want to return this as a def.
645  // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
647 
648  // If we found a pointer, check if it could be the same as our pointer.
649  AliasResult R = AA.alias(StoreLoc, MemLoc);
650 
651  if (R == NoAlias)
652  continue;
653  if (R == MustAlias)
654  return MemDepResult::getDef(Inst);
655  if (isInvariantLoad)
656  continue;
657  return MemDepResult::getClobber(Inst);
658  }
659 
660  // If this is an allocation, and if we know that the accessed pointer is to
661  // the allocation, return Def. This means that there is no dependence and
662  // the access can be optimized based on that. For example, a load could
663  // turn into undef. Note that we can bypass the allocation itself when
664  // looking for a clobber in many cases; that's an alias property and is
665  // handled by BasicAA.
666  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
667  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
668  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
669  return MemDepResult::getDef(Inst);
670  }
671 
672  if (isInvariantLoad)
673  continue;
674 
675  // A release fence requires that all stores complete before it, but does
676  // not prevent the reordering of following loads or stores 'before' the
677  // fence. As a result, we look past it when finding a dependency for
678  // loads. DSE uses this to find preceding stores to delete and thus we
679  // can't bypass the fence if the query instruction is a store.
680  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
681  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
682  continue;
683 
684  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
685  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
686  // If necessary, perform additional analysis.
687  if (isModAndRefSet(MR))
688  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB);
689  switch (clearMust(MR)) {
691  // If the call has no effect on the queried pointer, just ignore it.
692  continue;
693  case ModRefInfo::Mod:
694  return MemDepResult::getClobber(Inst);
695  case ModRefInfo::Ref:
696  // If the call is known to never store to the pointer, and if this is a
697  // load query, we can safely ignore it (scan past it).
698  if (isLoad)
699  continue;
701  default:
702  // Otherwise, there is a potential dependence. Return a clobber.
703  return MemDepResult::getClobber(Inst);
704  }
705  }
706 
707  // No dependence found. If this is the entry block of the function, it is
708  // unknown, otherwise it is non-local.
709  if (BB != &BB->getParent()->getEntryBlock())
710  return MemDepResult::getNonLocal();
712 }
713 
715  OrderedBasicBlock *OBB) {
716  Instruction *ScanPos = QueryInst;
717 
718  // Check for a cached result
719  MemDepResult &LocalCache = LocalDeps[QueryInst];
720 
721  // If the cached entry is non-dirty, just return it. Note that this depends
722  // on MemDepResult's default constructing to 'dirty'.
723  if (!LocalCache.isDirty())
724  return LocalCache;
725 
726  // Otherwise, if we have a dirty entry, we know we can start the scan at that
727  // instruction, which may save us some work.
728  if (Instruction *Inst = LocalCache.getInst()) {
729  ScanPos = Inst;
730 
731  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
732  }
733 
734  BasicBlock *QueryParent = QueryInst->getParent();
735 
736  // Do the scan.
737  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
738  // No dependence found. If this is the entry block of the function, it is
739  // unknown, otherwise it is non-local.
740  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
741  LocalCache = MemDepResult::getNonLocal();
742  else
743  LocalCache = MemDepResult::getNonFuncLocal();
744  } else {
745  MemoryLocation MemLoc;
746  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
747  if (MemLoc.Ptr) {
748  // If we can do a pointer scan, make it happen.
749  bool isLoad = !isModSet(MR);
750  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
751  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
752 
753  LocalCache =
754  getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
755  QueryParent, QueryInst, nullptr, OBB);
756  } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
757  bool isReadOnly = AA.onlyReadsMemory(QueryCall);
758  LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
759  ScanPos->getIterator(), QueryParent);
760  } else
761  // Non-memory instruction.
762  LocalCache = MemDepResult::getUnknown();
763  }
764 
765  // Remember the result!
766  if (Instruction *I = LocalCache.getInst())
767  ReverseLocalDeps[I].insert(QueryInst);
768 
769  return LocalCache;
770 }
771 
772 #ifndef NDEBUG
773 /// This method is used when -debug is specified to verify that cache arrays
774 /// are properly kept sorted.
776  int Count = -1) {
777  if (Count == -1)
778  Count = Cache.size();
779  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
780  "Cache isn't sorted!");
781 }
782 #endif
783 
786  assert(getDependency(QueryCall).isNonLocal() &&
787  "getNonLocalCallDependency should only be used on calls with "
788  "non-local deps!");
789  PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
790  NonLocalDepInfo &Cache = CacheP.first;
791 
792  // This is the set of blocks that need to be recomputed. In the cached case,
793  // this can happen due to instructions being deleted etc. In the uncached
794  // case, this starts out as the set of predecessors we care about.
795  SmallVector<BasicBlock *, 32> DirtyBlocks;
796 
797  if (!Cache.empty()) {
798  // Okay, we have a cache entry. If we know it is not dirty, just return it
799  // with no computation.
800  if (!CacheP.second) {
801  ++NumCacheNonLocal;
802  return Cache;
803  }
804 
805  // If we already have a partially computed set of results, scan them to
806  // determine what is dirty, seeding our initial DirtyBlocks worklist.
807  for (auto &Entry : Cache)
808  if (Entry.getResult().isDirty())
809  DirtyBlocks.push_back(Entry.getBB());
810 
811  // Sort the cache so that we can do fast binary search lookups below.
812  llvm::sort(Cache);
813 
814  ++NumCacheDirtyNonLocal;
815  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
816  // << Cache.size() << " cached: " << *QueryInst;
817  } else {
818  // Seed DirtyBlocks with each of the preds of QueryInst's block.
819  BasicBlock *QueryBB = QueryCall->getParent();
820  for (BasicBlock *Pred : PredCache.get(QueryBB))
821  DirtyBlocks.push_back(Pred);
822  ++NumUncacheNonLocal;
823  }
824 
825  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
826  bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
827 
829 
830  unsigned NumSortedEntries = Cache.size();
831  LLVM_DEBUG(AssertSorted(Cache));
832 
833  // Iterate while we still have blocks to update.
834  while (!DirtyBlocks.empty()) {
835  BasicBlock *DirtyBB = DirtyBlocks.back();
836  DirtyBlocks.pop_back();
837 
838  // Already processed this block?
839  if (!Visited.insert(DirtyBB).second)
840  continue;
841 
842  // Do a binary search to see if we already have an entry for this block in
843  // the cache set. If so, find it.
844  LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
845  NonLocalDepInfo::iterator Entry =
846  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
847  NonLocalDepEntry(DirtyBB));
848  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
849  --Entry;
850 
851  NonLocalDepEntry *ExistingResult = nullptr;
852  if (Entry != Cache.begin() + NumSortedEntries &&
853  Entry->getBB() == DirtyBB) {
854  // If we already have an entry, and if it isn't already dirty, the block
855  // is done.
856  if (!Entry->getResult().isDirty())
857  continue;
858 
859  // Otherwise, remember this slot so we can update the value.
860  ExistingResult = &*Entry;
861  }
862 
863  // If the dirty entry has a pointer, start scanning from it so we don't have
864  // to rescan the entire block.
865  BasicBlock::iterator ScanPos = DirtyBB->end();
866  if (ExistingResult) {
867  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
868  ScanPos = Inst->getIterator();
869  // We're removing QueryInst's use of Inst.
870  RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
871  QueryCall);
872  }
873  }
874 
875  // Find out if this block has a local dependency for QueryInst.
876  MemDepResult Dep;
877 
878  if (ScanPos != DirtyBB->begin()) {
879  Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
880  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
881  // No dependence found. If this is the entry block of the function, it is
882  // a clobber, otherwise it is unknown.
884  } else {
886  }
887 
888  // If we had a dirty entry for the block, update it. Otherwise, just add
889  // a new entry.
890  if (ExistingResult)
891  ExistingResult->setResult(Dep);
892  else
893  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
894 
895  // If the block has a dependency (i.e. it isn't completely transparent to
896  // the value), remember the association!
897  if (!Dep.isNonLocal()) {
898  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
899  // update this when we remove instructions.
900  if (Instruction *Inst = Dep.getInst())
901  ReverseNonLocalDeps[Inst].insert(QueryCall);
902  } else {
903 
904  // If the block *is* completely transparent to the load, we need to check
905  // the predecessors of this block. Add them to our worklist.
906  for (BasicBlock *Pred : PredCache.get(DirtyBB))
907  DirtyBlocks.push_back(Pred);
908  }
909  }
910 
911  return Cache;
912 }
913 
915  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
916  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
917  bool isLoad = isa<LoadInst>(QueryInst);
918  BasicBlock *FromBB = QueryInst->getParent();
919  assert(FromBB);
920 
921  assert(Loc.Ptr->getType()->isPointerTy() &&
922  "Can't get pointer deps of a non-pointer!");
923  Result.clear();
924  {
925  // Check if there is cached Def with invariant.group.
926  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
927  if (NonLocalDefIt != NonLocalDefsCache.end()) {
928  Result.push_back(NonLocalDefIt->second);
929  ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
930  .erase(QueryInst);
931  NonLocalDefsCache.erase(NonLocalDefIt);
932  return;
933  }
934  }
935  // This routine does not expect to deal with volatile instructions.
936  // Doing so would require piping through the QueryInst all the way through.
937  // TODO: volatiles can't be elided, but they can be reordered with other
938  // non-volatile accesses.
939 
940  // We currently give up on any instruction which is ordered, but we do handle
941  // atomic instructions which are unordered.
942  // TODO: Handle ordered instructions
943  auto isOrdered = [](Instruction *Inst) {
944  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
945  return !LI->isUnordered();
946  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
947  return !SI->isUnordered();
948  }
949  return false;
950  };
951  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
953  const_cast<Value *>(Loc.Ptr)));
954  return;
955  }
956  const DataLayout &DL = FromBB->getModule()->getDataLayout();
957  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
958 
959  // This is the set of blocks we've inspected, and the pointer we consider in
960  // each block. Because of critical edges, we currently bail out if querying
961  // a block with multiple different pointers. This can happen during PHI
962  // translation.
964  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
965  Result, Visited, true))
966  return;
967  Result.clear();
969  const_cast<Value *>(Loc.Ptr)));
970 }
971 
972 /// Compute the memdep value for BB with Pointer/PointeeSize using either
973 /// cached information in Cache or by doing a lookup (which may use dirty cache
974 /// info if available).
975 ///
976 /// If we do a lookup, add the result to the cache.
977 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
978  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
979  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
980 
981  // Do a binary search to see if we already have an entry for this block in
982  // the cache set. If so, find it.
983  NonLocalDepInfo::iterator Entry = std::upper_bound(
984  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
985  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
986  --Entry;
987 
988  NonLocalDepEntry *ExistingResult = nullptr;
989  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
990  ExistingResult = &*Entry;
991 
992  // If we have a cached entry, and it is non-dirty, use it as the value for
993  // this dependency.
994  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
995  ++NumCacheNonLocalPtr;
996  return ExistingResult->getResult();
997  }
998 
999  // Otherwise, we have to scan for the value. If we have a dirty cache
1000  // entry, start scanning from its position, otherwise we scan from the end
1001  // of the block.
1002  BasicBlock::iterator ScanPos = BB->end();
1003  if (ExistingResult && ExistingResult->getResult().getInst()) {
1004  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1005  "Instruction invalidated?");
1006  ++NumCacheDirtyNonLocalPtr;
1007  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1008 
1009  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1010  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1011  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1012  } else {
1013  ++NumUncacheNonLocalPtr;
1014  }
1015 
1016  // Scan the block for the dependency.
1017  MemDepResult Dep =
1018  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1019 
1020  // If we had a dirty entry for the block, update it. Otherwise, just add
1021  // a new entry.
1022  if (ExistingResult)
1023  ExistingResult->setResult(Dep);
1024  else
1025  Cache->push_back(NonLocalDepEntry(BB, Dep));
1026 
1027  // If the block has a dependency (i.e. it isn't completely transparent to
1028  // the value), remember the reverse association because we just added it
1029  // to Cache!
1030  if (!Dep.isDef() && !Dep.isClobber())
1031  return Dep;
1032 
1033  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1034  // update MemDep when we remove instructions.
1035  Instruction *Inst = Dep.getInst();
1036  assert(Inst && "Didn't depend on anything?");
1037  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1038  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1039  return Dep;
1040 }
1041 
1042 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1043 /// array that are already properly ordered.
1044 ///
1045 /// This is optimized for the case when only a few entries are added.
1046 static void
1048  unsigned NumSortedEntries) {
1049  switch (Cache.size() - NumSortedEntries) {
1050  case 0:
1051  // done, no new entries.
1052  break;
1053  case 2: {
1054  // Two new entries, insert the last one into place.
1055  NonLocalDepEntry Val = Cache.back();
1056  Cache.pop_back();
1057  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1058  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1059  Cache.insert(Entry, Val);
1061  }
1062  case 1:
1063  // One new entry, Just insert the new value at the appropriate position.
1064  if (Cache.size() != 1) {
1065  NonLocalDepEntry Val = Cache.back();
1066  Cache.pop_back();
1067  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1068  std::upper_bound(Cache.begin(), Cache.end(), Val);
1069  Cache.insert(Entry, Val);
1070  }
1071  break;
1072  default:
1073  // Added many values, do a full scale sort.
1074  llvm::sort(Cache);
1075  break;
1076  }
1077 }
1078 
1079 /// Perform a dependency query based on pointer/pointeesize starting at the end
1080 /// of StartBB.
1081 ///
1082 /// Add any clobber/def results to the results vector and keep track of which
1083 /// blocks are visited in 'Visited'.
1084 ///
1085 /// This has special behavior for the first block queries (when SkipFirstBlock
1086 /// is true). In this special case, it ignores the contents of the specified
1087 /// block and starts returning dependence info for its predecessors.
1088 ///
1089 /// This function returns true on success, or false to indicate that it could
1090 /// not compute dependence information for some reason. This should be treated
1091 /// as a clobber dependence on the first instruction in the predecessor block.
1092 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1093  Instruction *QueryInst, const PHITransAddr &Pointer,
1094  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1096  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1097  // Look up the cached info for Pointer.
1098  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1099 
1100  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1101  // CacheKey, this value will be inserted as the associated value. Otherwise,
1102  // it'll be ignored, and we'll have to check to see if the cached size and
1103  // aa tags are consistent with the current query.
1104  NonLocalPointerInfo InitialNLPI;
1105  InitialNLPI.Size = Loc.Size;
1106  InitialNLPI.AATags = Loc.AATags;
1107 
1108  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1109  // already have one.
1110  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1111  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1112  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1113 
1114  // If we already have a cache entry for this CacheKey, we may need to do some
1115  // work to reconcile the cache entry and the current query.
1116  if (!Pair.second) {
1117  if (CacheInfo->Size != Loc.Size) {
1118  bool ThrowOutEverything;
1119  if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1120  // FIXME: We may be able to do better in the face of results with mixed
1121  // precision. We don't appear to get them in practice, though, so just
1122  // be conservative.
1123  ThrowOutEverything =
1124  CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1125  CacheInfo->Size.getValue() < Loc.Size.getValue();
1126  } else {
1127  // For our purposes, unknown size > all others.
1128  ThrowOutEverything = !Loc.Size.hasValue();
1129  }
1130 
1131  if (ThrowOutEverything) {
1132  // The query's Size is greater than the cached one. Throw out the
1133  // cached data and proceed with the query at the greater size.
1134  CacheInfo->Pair = BBSkipFirstBlockPair();
1135  CacheInfo->Size = Loc.Size;
1136  for (auto &Entry : CacheInfo->NonLocalDeps)
1137  if (Instruction *Inst = Entry.getResult().getInst())
1138  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1139  CacheInfo->NonLocalDeps.clear();
1140  } else {
1141  // This query's Size is less than the cached one. Conservatively restart
1142  // the query using the greater size.
1143  return getNonLocalPointerDepFromBB(
1144  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1145  StartBB, Result, Visited, SkipFirstBlock);
1146  }
1147  }
1148 
1149  // If the query's AATags are inconsistent with the cached one,
1150  // conservatively throw out the cached data and restart the query with
1151  // no tag if needed.
1152  if (CacheInfo->AATags != Loc.AATags) {
1153  if (CacheInfo->AATags) {
1154  CacheInfo->Pair = BBSkipFirstBlockPair();
1155  CacheInfo->AATags = AAMDNodes();
1156  for (auto &Entry : CacheInfo->NonLocalDeps)
1157  if (Instruction *Inst = Entry.getResult().getInst())
1158  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1159  CacheInfo->NonLocalDeps.clear();
1160  }
1161  if (Loc.AATags)
1162  return getNonLocalPointerDepFromBB(
1163  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1164  Visited, SkipFirstBlock);
1165  }
1166  }
1167 
1168  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1169 
1170  // If we have valid cached information for exactly the block we are
1171  // investigating, just return it with no recomputation.
1172  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1173  // We have a fully cached result for this query then we can just return the
1174  // cached results and populate the visited set. However, we have to verify
1175  // that we don't already have conflicting results for these blocks. Check
1176  // to ensure that if a block in the results set is in the visited set that
1177  // it was for the same pointer query.
1178  if (!Visited.empty()) {
1179  for (auto &Entry : *Cache) {
1181  Visited.find(Entry.getBB());
1182  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1183  continue;
1184 
1185  // We have a pointer mismatch in a block. Just return false, saying
1186  // that something was clobbered in this result. We could also do a
1187  // non-fully cached query, but there is little point in doing this.
1188  return false;
1189  }
1190  }
1191 
1192  Value *Addr = Pointer.getAddr();
1193  for (auto &Entry : *Cache) {
1194  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1195  if (Entry.getResult().isNonLocal()) {
1196  continue;
1197  }
1198 
1199  if (DT.isReachableFromEntry(Entry.getBB())) {
1200  Result.push_back(
1201  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1202  }
1203  }
1204  ++NumCacheCompleteNonLocalPtr;
1205  return true;
1206  }
1207 
1208  // Otherwise, either this is a new block, a block with an invalid cache
1209  // pointer or one that we're about to invalidate by putting more info into it
1210  // than its valid cache info. If empty, the result will be valid cache info,
1211  // otherwise it isn't.
1212  if (Cache->empty())
1213  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1214  else
1215  CacheInfo->Pair = BBSkipFirstBlockPair();
1216 
1218  Worklist.push_back(StartBB);
1219 
1220  // PredList used inside loop.
1222 
1223  // Keep track of the entries that we know are sorted. Previously cached
1224  // entries will all be sorted. The entries we add we only sort on demand (we
1225  // don't insert every element into its sorted position). We know that we
1226  // won't get any reuse from currently inserted values, because we don't
1227  // revisit blocks after we insert info for them.
1228  unsigned NumSortedEntries = Cache->size();
1229  unsigned WorklistEntries = BlockNumberLimit;
1230  bool GotWorklistLimit = false;
1231  LLVM_DEBUG(AssertSorted(*Cache));
1232 
1233  while (!Worklist.empty()) {
1234  BasicBlock *BB = Worklist.pop_back_val();
1235 
1236  // If we do process a large number of blocks it becomes very expensive and
1237  // likely it isn't worth worrying about
1238  if (Result.size() > NumResultsLimit) {
1239  Worklist.clear();
1240  // Sort it now (if needed) so that recursive invocations of
1241  // getNonLocalPointerDepFromBB and other routines that could reuse the
1242  // cache value will only see properly sorted cache arrays.
1243  if (Cache && NumSortedEntries != Cache->size()) {
1244  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1245  }
1246  // Since we bail out, the "Cache" set won't contain all of the
1247  // results for the query. This is ok (we can still use it to accelerate
1248  // specific block queries) but we can't do the fastpath "return all
1249  // results from the set". Clear out the indicator for this.
1250  CacheInfo->Pair = BBSkipFirstBlockPair();
1251  return false;
1252  }
1253 
1254  // Skip the first block if we have it.
1255  if (!SkipFirstBlock) {
1256  // Analyze the dependency of *Pointer in FromBB. See if we already have
1257  // been here.
1258  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1259 
1260  // Get the dependency info for Pointer in BB. If we have cached
1261  // information, we will use it, otherwise we compute it.
1262  LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1263  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1264  Cache, NumSortedEntries);
1265 
1266  // If we got a Def or Clobber, add this to the list of results.
1267  if (!Dep.isNonLocal()) {
1268  if (DT.isReachableFromEntry(BB)) {
1269  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1270  continue;
1271  }
1272  }
1273  }
1274 
1275  // If 'Pointer' is an instruction defined in this block, then we need to do
1276  // phi translation to change it into a value live in the predecessor block.
1277  // If not, we just add the predecessors to the worklist and scan them with
1278  // the same Pointer.
1279  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1280  SkipFirstBlock = false;
1282  for (BasicBlock *Pred : PredCache.get(BB)) {
1283  // Verify that we haven't looked at this block yet.
1284  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1285  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1286  if (InsertRes.second) {
1287  // First time we've looked at *PI.
1288  NewBlocks.push_back(Pred);
1289  continue;
1290  }
1291 
1292  // If we have seen this block before, but it was with a different
1293  // pointer then we have a phi translation failure and we have to treat
1294  // this as a clobber.
1295  if (InsertRes.first->second != Pointer.getAddr()) {
1296  // Make sure to clean up the Visited map before continuing on to
1297  // PredTranslationFailure.
1298  for (unsigned i = 0; i < NewBlocks.size(); i++)
1299  Visited.erase(NewBlocks[i]);
1300  goto PredTranslationFailure;
1301  }
1302  }
1303  if (NewBlocks.size() > WorklistEntries) {
1304  // Make sure to clean up the Visited map before continuing on to
1305  // PredTranslationFailure.
1306  for (unsigned i = 0; i < NewBlocks.size(); i++)
1307  Visited.erase(NewBlocks[i]);
1308  GotWorklistLimit = true;
1309  goto PredTranslationFailure;
1310  }
1311  WorklistEntries -= NewBlocks.size();
1312  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1313  continue;
1314  }
1315 
1316  // We do need to do phi translation, if we know ahead of time we can't phi
1317  // translate this value, don't even try.
1318  if (!Pointer.IsPotentiallyPHITranslatable())
1319  goto PredTranslationFailure;
1320 
1321  // We may have added values to the cache list before this PHI translation.
1322  // If so, we haven't done anything to ensure that the cache remains sorted.
1323  // Sort it now (if needed) so that recursive invocations of
1324  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1325  // value will only see properly sorted cache arrays.
1326  if (Cache && NumSortedEntries != Cache->size()) {
1327  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1328  NumSortedEntries = Cache->size();
1329  }
1330  Cache = nullptr;
1331 
1332  PredList.clear();
1333  for (BasicBlock *Pred : PredCache.get(BB)) {
1334  PredList.push_back(std::make_pair(Pred, Pointer));
1335 
1336  // Get the PHI translated pointer in this predecessor. This can fail if
1337  // not translatable, in which case the getAddr() returns null.
1338  PHITransAddr &PredPointer = PredList.back().second;
1339  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1340  Value *PredPtrVal = PredPointer.getAddr();
1341 
1342  // Check to see if we have already visited this pred block with another
1343  // pointer. If so, we can't do this lookup. This failure can occur
1344  // with PHI translation when a critical edge exists and the PHI node in
1345  // the successor translates to a pointer value different than the
1346  // pointer the block was first analyzed with.
1347  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1348  Visited.insert(std::make_pair(Pred, PredPtrVal));
1349 
1350  if (!InsertRes.second) {
1351  // We found the pred; take it off the list of preds to visit.
1352  PredList.pop_back();
1353 
1354  // If the predecessor was visited with PredPtr, then we already did
1355  // the analysis and can ignore it.
1356  if (InsertRes.first->second == PredPtrVal)
1357  continue;
1358 
1359  // Otherwise, the block was previously analyzed with a different
1360  // pointer. We can't represent the result of this case, so we just
1361  // treat this as a phi translation failure.
1362 
1363  // Make sure to clean up the Visited map before continuing on to
1364  // PredTranslationFailure.
1365  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1366  Visited.erase(PredList[i].first);
1367 
1368  goto PredTranslationFailure;
1369  }
1370  }
1371 
1372  // Actually process results here; this need to be a separate loop to avoid
1373  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1374  // any results for. (getNonLocalPointerDepFromBB will modify our
1375  // datastructures in ways the code after the PredTranslationFailure label
1376  // doesn't expect.)
1377  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1378  BasicBlock *Pred = PredList[i].first;
1379  PHITransAddr &PredPointer = PredList[i].second;
1380  Value *PredPtrVal = PredPointer.getAddr();
1381 
1382  bool CanTranslate = true;
1383  // If PHI translation was unable to find an available pointer in this
1384  // predecessor, then we have to assume that the pointer is clobbered in
1385  // that predecessor. We can still do PRE of the load, which would insert
1386  // a computation of the pointer in this predecessor.
1387  if (!PredPtrVal)
1388  CanTranslate = false;
1389 
1390  // FIXME: it is entirely possible that PHI translating will end up with
1391  // the same value. Consider PHI translating something like:
1392  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1393  // to recurse here, pedantically speaking.
1394 
1395  // If getNonLocalPointerDepFromBB fails here, that means the cached
1396  // result conflicted with the Visited list; we have to conservatively
1397  // assume it is unknown, but this also does not block PRE of the load.
1398  if (!CanTranslate ||
1399  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1400  Loc.getWithNewPtr(PredPtrVal), isLoad,
1401  Pred, Result, Visited)) {
1402  // Add the entry to the Result list.
1403  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1404  Result.push_back(Entry);
1405 
1406  // Since we had a phi translation failure, the cache for CacheKey won't
1407  // include all of the entries that we need to immediately satisfy future
1408  // queries. Mark this in NonLocalPointerDeps by setting the
1409  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1410  // cached value to do more work but not miss the phi trans failure.
1411  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1412  NLPI.Pair = BBSkipFirstBlockPair();
1413  continue;
1414  }
1415  }
1416 
1417  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1418  CacheInfo = &NonLocalPointerDeps[CacheKey];
1419  Cache = &CacheInfo->NonLocalDeps;
1420  NumSortedEntries = Cache->size();
1421 
1422  // Since we did phi translation, the "Cache" set won't contain all of the
1423  // results for the query. This is ok (we can still use it to accelerate
1424  // specific block queries) but we can't do the fastpath "return all
1425  // results from the set" Clear out the indicator for this.
1426  CacheInfo->Pair = BBSkipFirstBlockPair();
1427  SkipFirstBlock = false;
1428  continue;
1429 
1430  PredTranslationFailure:
1431  // The following code is "failure"; we can't produce a sane translation
1432  // for the given block. It assumes that we haven't modified any of
1433  // our datastructures while processing the current block.
1434 
1435  if (!Cache) {
1436  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1437  CacheInfo = &NonLocalPointerDeps[CacheKey];
1438  Cache = &CacheInfo->NonLocalDeps;
1439  NumSortedEntries = Cache->size();
1440  }
1441 
1442  // Since we failed phi translation, the "Cache" set won't contain all of the
1443  // results for the query. This is ok (we can still use it to accelerate
1444  // specific block queries) but we can't do the fastpath "return all
1445  // results from the set". Clear out the indicator for this.
1446  CacheInfo->Pair = BBSkipFirstBlockPair();
1447 
1448  // If *nothing* works, mark the pointer as unknown.
1449  //
1450  // If this is the magic first block, return this as a clobber of the whole
1451  // incoming value. Since we can't phi translate to one of the predecessors,
1452  // we have to bail out.
1453  if (SkipFirstBlock)
1454  return false;
1455 
1456  bool foundBlock = false;
1457  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1458  if (I.getBB() != BB)
1459  continue;
1460 
1461  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1462  !DT.isReachableFromEntry(BB)) &&
1463  "Should only be here with transparent block");
1464  foundBlock = true;
1465  I.setResult(MemDepResult::getUnknown());
1466  Result.push_back(
1467  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1468  break;
1469  }
1470  (void)foundBlock; (void)GotWorklistLimit;
1471  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1472  }
1473 
1474  // Okay, we're done now. If we added new values to the cache, re-sort it.
1475  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1476  LLVM_DEBUG(AssertSorted(*Cache));
1477  return true;
1478 }
1479 
1480 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1481 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1482  ValueIsLoadPair P) {
1483 
1484  // Most of the time this cache is empty.
1485  if (!NonLocalDefsCache.empty()) {
1486  auto it = NonLocalDefsCache.find(P.getPointer());
1487  if (it != NonLocalDefsCache.end()) {
1488  RemoveFromReverseMap(ReverseNonLocalDefsCache,
1489  it->second.getResult().getInst(), P.getPointer());
1490  NonLocalDefsCache.erase(it);
1491  }
1492 
1493  if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1494  auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1495  if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1496  for (const auto &entry : toRemoveIt->second)
1497  NonLocalDefsCache.erase(entry);
1498  ReverseNonLocalDefsCache.erase(toRemoveIt);
1499  }
1500  }
1501  }
1502 
1503  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1504  if (It == NonLocalPointerDeps.end())
1505  return;
1506 
1507  // Remove all of the entries in the BB->val map. This involves removing
1508  // instructions from the reverse map.
1509  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1510 
1511  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1512  Instruction *Target = PInfo[i].getResult().getInst();
1513  if (!Target)
1514  continue; // Ignore non-local dep results.
1515  assert(Target->getParent() == PInfo[i].getBB());
1516 
1517  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1518  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1519  }
1520 
1521  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1522  NonLocalPointerDeps.erase(It);
1523 }
1524 
1526  // If Ptr isn't really a pointer, just ignore it.
1527  if (!Ptr->getType()->isPointerTy())
1528  return;
1529  // Flush store info for the pointer.
1530  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1531  // Flush load info for the pointer.
1532  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1533  // Invalidate phis that use the pointer.
1534  PV.invalidateValue(Ptr);
1535 }
1536 
1538  PredCache.clear();
1539 }
1540 
1542  // Walk through the Non-local dependencies, removing this one as the value
1543  // for any cached queries.
1544  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1545  if (NLDI != NonLocalDeps.end()) {
1546  NonLocalDepInfo &BlockMap = NLDI->second.first;
1547  for (auto &Entry : BlockMap)
1548  if (Instruction *Inst = Entry.getResult().getInst())
1549  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1550  NonLocalDeps.erase(NLDI);
1551  }
1552 
1553  // If we have a cached local dependence query for this instruction, remove it.
1554  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1555  if (LocalDepEntry != LocalDeps.end()) {
1556  // Remove us from DepInst's reverse set now that the local dep info is gone.
1557  if (Instruction *Inst = LocalDepEntry->second.getInst())
1558  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1559 
1560  // Remove this local dependency info.
1561  LocalDeps.erase(LocalDepEntry);
1562  }
1563 
1564  // If we have any cached pointer dependencies on this instruction, remove
1565  // them. If the instruction has non-pointer type, then it can't be a pointer
1566  // base.
1567 
1568  // Remove it from both the load info and the store info. The instruction
1569  // can't be in either of these maps if it is non-pointer.
1570  if (RemInst->getType()->isPointerTy()) {
1571  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1572  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1573  }
1574 
1575  // Loop over all of the things that depend on the instruction we're removing.
1577 
1578  // If we find RemInst as a clobber or Def in any of the maps for other values,
1579  // we need to replace its entry with a dirty version of the instruction after
1580  // it. If RemInst is a terminator, we use a null dirty value.
1581  //
1582  // Using a dirty version of the instruction after RemInst saves having to scan
1583  // the entire block to get to this point.
1584  MemDepResult NewDirtyVal;
1585  if (!RemInst->isTerminator())
1586  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1587 
1588  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1589  if (ReverseDepIt != ReverseLocalDeps.end()) {
1590  // RemInst can't be the terminator if it has local stuff depending on it.
1591  assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1592  "Nothing can locally depend on a terminator");
1593 
1594  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1595  assert(InstDependingOnRemInst != RemInst &&
1596  "Already removed our local dep info");
1597 
1598  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1599 
1600  // Make sure to remember that new things depend on NewDepInst.
1601  assert(NewDirtyVal.getInst() &&
1602  "There is no way something else can have "
1603  "a local dep on this if it is a terminator!");
1604  ReverseDepsToAdd.push_back(
1605  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1606  }
1607 
1608  ReverseLocalDeps.erase(ReverseDepIt);
1609 
1610  // Add new reverse deps after scanning the set, to avoid invalidating the
1611  // 'ReverseDeps' reference.
1612  while (!ReverseDepsToAdd.empty()) {
1613  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1614  ReverseDepsToAdd.back().second);
1615  ReverseDepsToAdd.pop_back();
1616  }
1617  }
1618 
1619  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1620  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1621  for (Instruction *I : ReverseDepIt->second) {
1622  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1623 
1624  PerInstNLInfo &INLD = NonLocalDeps[I];
1625  // The information is now dirty!
1626  INLD.second = true;
1627 
1628  for (auto &Entry : INLD.first) {
1629  if (Entry.getResult().getInst() != RemInst)
1630  continue;
1631 
1632  // Convert to a dirty entry for the subsequent instruction.
1633  Entry.setResult(NewDirtyVal);
1634 
1635  if (Instruction *NextI = NewDirtyVal.getInst())
1636  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1637  }
1638  }
1639 
1640  ReverseNonLocalDeps.erase(ReverseDepIt);
1641 
1642  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1643  while (!ReverseDepsToAdd.empty()) {
1644  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1645  ReverseDepsToAdd.back().second);
1646  ReverseDepsToAdd.pop_back();
1647  }
1648  }
1649 
1650  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1651  // value in the NonLocalPointerDeps info.
1652  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1653  ReverseNonLocalPtrDeps.find(RemInst);
1654  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1656  ReversePtrDepsToAdd;
1657 
1658  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1659  assert(P.getPointer() != RemInst &&
1660  "Already removed NonLocalPointerDeps info for RemInst");
1661 
1662  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1663 
1664  // The cache is not valid for any specific block anymore.
1665  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1666 
1667  // Update any entries for RemInst to use the instruction after it.
1668  for (auto &Entry : NLPDI) {
1669  if (Entry.getResult().getInst() != RemInst)
1670  continue;
1671 
1672  // Convert to a dirty entry for the subsequent instruction.
1673  Entry.setResult(NewDirtyVal);
1674 
1675  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1676  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1677  }
1678 
1679  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1680  // subsequent value may invalidate the sortedness.
1681  llvm::sort(NLPDI);
1682  }
1683 
1684  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1685 
1686  while (!ReversePtrDepsToAdd.empty()) {
1687  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1688  ReversePtrDepsToAdd.back().second);
1689  ReversePtrDepsToAdd.pop_back();
1690  }
1691  }
1692 
1693  // Invalidate phis that use the removed instruction.
1694  PV.invalidateValue(RemInst);
1695 
1696  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1697  LLVM_DEBUG(verifyRemoved(RemInst));
1698 }
1699 
1700 /// Verify that the specified instruction does not occur in our internal data
1701 /// structures.
1702 ///
1703 /// This function verifies by asserting in debug builds.
1704 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1705 #ifndef NDEBUG
1706  for (const auto &DepKV : LocalDeps) {
1707  assert(DepKV.first != D && "Inst occurs in data structures");
1708  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1709  }
1710 
1711  for (const auto &DepKV : NonLocalPointerDeps) {
1712  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1713  for (const auto &Entry : DepKV.second.NonLocalDeps)
1714  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1715  }
1716 
1717  for (const auto &DepKV : NonLocalDeps) {
1718  assert(DepKV.first != D && "Inst occurs in data structures");
1719  const PerInstNLInfo &INLD = DepKV.second;
1720  for (const auto &Entry : INLD.first)
1721  assert(Entry.getResult().getInst() != D &&
1722  "Inst occurs in data structures");
1723  }
1724 
1725  for (const auto &DepKV : ReverseLocalDeps) {
1726  assert(DepKV.first != D && "Inst occurs in data structures");
1727  for (Instruction *Inst : DepKV.second)
1728  assert(Inst != D && "Inst occurs in data structures");
1729  }
1730 
1731  for (const auto &DepKV : ReverseNonLocalDeps) {
1732  assert(DepKV.first != D && "Inst occurs in data structures");
1733  for (Instruction *Inst : DepKV.second)
1734  assert(Inst != D && "Inst occurs in data structures");
1735  }
1736 
1737  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1738  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1739 
1740  for (ValueIsLoadPair P : DepKV.second)
1741  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1742  "Inst occurs in ReverseNonLocalPtrDeps map");
1743  }
1744 #endif
1745 }
1746 
1747 AnalysisKey MemoryDependenceAnalysis::Key;
1748 
1750  : DefaultBlockScanLimit(BlockScanLimit) {}
1751 
1754  auto &AA = AM.getResult<AAManager>(F);
1755  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1756  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1757  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1758  auto &PV = AM.getResult<PhiValuesAnalysis>(F);
1759  return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
1760 }
1761 
1763 
1765  "Memory Dependence Analysis", false, true)
1772  "Memory Dependence Analysis", false, true)
1773 
1774 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1776 }
1777 
1779 
1781  MemDep.reset();
1782 }
1783 
1785  AU.setPreservesAll();
1791 }
1792 
1795  // Check whether our analysis is preserved.
1796  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1797  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1798  // If not, give up now.
1799  return true;
1800 
1801  // Check whether the analyses we depend on became invalid for any reason.
1802  if (Inv.invalidate<AAManager>(F, PA) ||
1803  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1804  Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
1805  Inv.invalidate<PhiValuesAnalysis>(F, PA))
1806  return true;
1807 
1808  // Otherwise this analysis result remains valid.
1809  return false;
1810 }
1811 
1813  return DefaultBlockScanLimit;
1814 }
1815 
1817  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1818  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1819  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1820  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1821  auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
1822  MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
1823  return false;
1824 }
The access may reference and may modify the value stored in memory.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:666
bool isSimple() const
Definition: Instructions.h:281
iterator_range< use_iterator > uses()
Definition: Value.h:374
The access neither references nor modifies the value stored in memory.
bool isStrongerThanUnordered(AtomicOrdering ao)
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
LLVM_NODISCARD ModRefInfo clearMust(const ModRefInfo MRI)
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
PointerTy getPointer() const
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:462
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:53
bool isPrecise() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:953
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
bool isTerminator() const
Definition: Instruction.h:128
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
MemDepResult getDependency(Instruction *QueryInst, OrderedBasicBlock *OBB=nullptr)
Returns the instruction on which a memory operation depends.
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:169
Hexagon Common GEP
This defines the Use class.
LLVM_NODISCARD bool isModAndRefSet(const ModRefInfo MRI)
The analysis pass which yields a PhiValues.
Definition: PhiValues.h:118
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:311
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:405
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
The access may reference the value stored in memory.
This file contains the simple types necessary to represent the attributes associated with functions a...
An analysis that produces MemoryDependenceResults for a function.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:224
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1717
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
An instruction for storing to memory.
Definition: Instructions.h:325
Wrapper pass for the legacy pass manager.
Definition: PhiValues.h:141
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
uint64_t getValue() const
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:664
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
MemoryLocation getWithoutAATags() const
MemoryLocation getWithNewSize(LocationSize NewSize) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
void invalidateValue(const Value *V)
Notify PhiValues that the cached information using V is no longer valid.
Definition: PhiValues.cpp:123
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
constexpr double e
Definition: MathExtras.h:57
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
Value * getPointerOperand()
Definition: Instructions.h:289
self_iterator getIterator()
Definition: ilist_node.h:81
void setResult(const MemDepResult &R)
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr, OrderedBasicBlock *OBB=nullptr)
Returns the instruction on which a memory location depends.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:681
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, OrderedBasicBlock *OBB)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
A function analysis which provides an AssumptionCache.
The two locations precisely alias each other.
Definition: AliasAnalysis.h:90
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
iterator end()
Definition: BasicBlock.h:275
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
The access may modify the value stored in memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD bool isNoModRef(const ModRefInfo MRI)
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:242
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Value * getAddr() const
Definition: PHITransAddr.h:59
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:648
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:91
LLVM Value Representation.
Definition: Value.h:73
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
bool hasValue() const
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
print Instructions which execute on loop entry
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:344
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:88
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
auto upper_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range))
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1276
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66