LLVM  4.0.0
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Statistic.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/Debug.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <iterator>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "memdep"
53 
54 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
55 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
56 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
57 
58 STATISTIC(NumCacheNonLocalPtr,
59  "Number of fully cached non-local ptr responses");
60 STATISTIC(NumCacheDirtyNonLocalPtr,
61  "Number of cached, but dirty, non-local ptr responses");
62 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
63 STATISTIC(NumCacheCompleteNonLocalPtr,
64  "Number of block queries that were completely cached");
65 
66 // Limit for the number of instructions to scan in a block.
67 
69  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
70  cl::desc("The number of instructions to scan in a block in memory "
71  "dependency analysis (default = 100)"));
72 
73 static cl::opt<unsigned>
74  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
75  cl::desc("The number of blocks to scan during memory "
76  "dependency analysis (default = 1000)"));
77 
78 // Limit on the number of memdep results to process.
79 static const unsigned int NumResultsLimit = 100;
80 
81 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
82 ///
83 /// If the set becomes empty, remove Inst's entry.
84 template <typename KeyTy>
85 static void
87  Instruction *Inst, KeyTy Val) {
88  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
89  ReverseMap.find(Inst);
90  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
91  bool Found = InstIt->second.erase(Val);
92  assert(Found && "Invalid reverse map!");
93  (void)Found;
94  if (InstIt->second.empty())
95  ReverseMap.erase(InstIt);
96 }
97 
98 /// If the given instruction references a specific memory location, fill in Loc
99 /// with the details, otherwise set Loc.Ptr to null.
100 ///
101 /// Returns a ModRefInfo value describing the general behavior of the
102 /// instruction.
104  const TargetLibraryInfo &TLI) {
105  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
106  if (LI->isUnordered()) {
107  Loc = MemoryLocation::get(LI);
108  return MRI_Ref;
109  }
110  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
111  Loc = MemoryLocation::get(LI);
112  return MRI_ModRef;
113  }
114  Loc = MemoryLocation();
115  return MRI_ModRef;
116  }
117 
118  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
119  if (SI->isUnordered()) {
120  Loc = MemoryLocation::get(SI);
121  return MRI_Mod;
122  }
123  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
124  Loc = MemoryLocation::get(SI);
125  return MRI_ModRef;
126  }
127  Loc = MemoryLocation();
128  return MRI_ModRef;
129  }
130 
131  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
132  Loc = MemoryLocation::get(V);
133  return MRI_ModRef;
134  }
135 
136  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
137  // calls to free() deallocate the entire structure
138  Loc = MemoryLocation(CI->getArgOperand(0));
139  return MRI_Mod;
140  }
141 
142  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
143  AAMDNodes AAInfo;
144 
145  switch (II->getIntrinsicID()) {
146  case Intrinsic::lifetime_start:
147  case Intrinsic::lifetime_end:
148  case Intrinsic::invariant_start:
149  II->getAAMetadata(AAInfo);
150  Loc = MemoryLocation(
151  II->getArgOperand(1),
152  cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
153  // These intrinsics don't really modify the memory, but returning Mod
154  // will allow them to be handled conservatively.
155  return MRI_Mod;
156  case Intrinsic::invariant_end:
157  II->getAAMetadata(AAInfo);
158  Loc = MemoryLocation(
159  II->getArgOperand(2),
160  cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
161  // These intrinsics don't really modify the memory, but returning Mod
162  // will allow them to be handled conservatively.
163  return MRI_Mod;
164  default:
165  break;
166  }
167  }
168 
169  // Otherwise, just do the coarse-grained thing that always works.
170  if (Inst->mayWriteToMemory())
171  return MRI_ModRef;
172  if (Inst->mayReadFromMemory())
173  return MRI_Ref;
174  return MRI_NoModRef;
175 }
176 
177 /// Private helper for finding the local dependencies of a call site.
178 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
179  CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
180  BasicBlock *BB) {
181  unsigned Limit = BlockScanLimit;
182 
183  // Walk backwards through the block, looking for dependencies.
184  while (ScanIt != BB->begin()) {
185  // Limit the amount of scanning we do so we don't end up with quadratic
186  // running time on extreme testcases.
187  --Limit;
188  if (!Limit)
189  return MemDepResult::getUnknown();
190 
191  Instruction *Inst = &*--ScanIt;
192 
193  // If this inst is a memory op, get the pointer it accessed
194  MemoryLocation Loc;
195  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
196  if (Loc.Ptr) {
197  // A simple instruction.
198  if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef)
199  return MemDepResult::getClobber(Inst);
200  continue;
201  }
202 
203  if (auto InstCS = CallSite(Inst)) {
204  // Debug intrinsics don't cause dependences.
205  if (isa<DbgInfoIntrinsic>(Inst))
206  continue;
207  // If these two calls do not interfere, look past it.
208  switch (AA.getModRefInfo(CS, InstCS)) {
209  case MRI_NoModRef:
210  // If the two calls are the same, return InstCS as a Def, so that
211  // CS can be found redundant and eliminated.
212  if (isReadOnlyCall && !(MR & MRI_Mod) &&
214  return MemDepResult::getDef(Inst);
215 
216  // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
217  // keep scanning.
218  continue;
219  default:
220  return MemDepResult::getClobber(Inst);
221  }
222  }
223 
224  // If we could not obtain a pointer for the instruction and the instruction
225  // touches memory then assume that this is a dependency.
226  if (MR != MRI_NoModRef)
227  return MemDepResult::getClobber(Inst);
228  }
229 
230  // No dependence found. If this is the entry block of the function, it is
231  // unknown, otherwise it is non-local.
232  if (BB != &BB->getParent()->getEntryBlock())
233  return MemDepResult::getNonLocal();
235 }
236 
238  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
239  const LoadInst *LI) {
240  // We can only extend simple integer loads.
241  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
242  return 0;
243 
244  // Load widening is hostile to ThreadSanitizer: it may cause false positives
245  // or make the reports more cryptic (access sizes are wrong).
246  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
247  return 0;
248 
249  const DataLayout &DL = LI->getModule()->getDataLayout();
250 
251  // Get the base of this load.
252  int64_t LIOffs = 0;
253  const Value *LIBase =
255 
256  // If the two pointers are not based on the same pointer, we can't tell that
257  // they are related.
258  if (LIBase != MemLocBase)
259  return 0;
260 
261  // Okay, the two values are based on the same pointer, but returned as
262  // no-alias. This happens when we have things like two byte loads at "P+1"
263  // and "P+3". Check to see if increasing the size of the "LI" load up to its
264  // alignment (or the largest native integer type) will allow us to load all
265  // the bits required by MemLoc.
266 
267  // If MemLoc is before LI, then no widening of LI will help us out.
268  if (MemLocOffs < LIOffs)
269  return 0;
270 
271  // Get the alignment of the load in bytes. We assume that it is safe to load
272  // any legal integer up to this size without a problem. For example, if we're
273  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
274  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
275  // to i16.
276  unsigned LoadAlign = LI->getAlignment();
277 
278  int64_t MemLocEnd = MemLocOffs + MemLocSize;
279 
280  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
281  if (LIOffs + LoadAlign < MemLocEnd)
282  return 0;
283 
284  // This is the size of the load to try. Start with the next larger power of
285  // two.
286  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
287  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
288 
289  while (true) {
290  // If this load size is bigger than our known alignment or would not fit
291  // into a native integer register, then we fail.
292  if (NewLoadByteSize > LoadAlign ||
293  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
294  return 0;
295 
296  if (LIOffs + NewLoadByteSize > MemLocEnd &&
298  Attribute::SanitizeAddress))
299  // We will be reading past the location accessed by the original program.
300  // While this is safe in a regular build, Address Safety analysis tools
301  // may start reporting false warnings. So, don't do widening.
302  return 0;
303 
304  // If a load of this width would include all of MemLoc, then we succeed.
305  if (LIOffs + NewLoadByteSize >= MemLocEnd)
306  return NewLoadByteSize;
307 
308  NewLoadByteSize <<= 1;
309  }
310 }
311 
312 static bool isVolatile(Instruction *Inst) {
313  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
314  return LI->isVolatile();
315  else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
316  return SI->isVolatile();
317  else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
318  return AI->isVolatile();
319  return false;
320 }
321 
323  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
324  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
325 
326  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
327  if (QueryInst != nullptr) {
328  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
329  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
330 
331  if (InvariantGroupDependency.isDef())
332  return InvariantGroupDependency;
333  }
334  }
336  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
337  if (SimpleDep.isDef())
338  return SimpleDep;
339  // Non-local invariant group dependency indicates there is non local Def
340  // (it only returns nonLocal if it finds nonLocal def), which is better than
341  // local clobber and everything else.
342  if (InvariantGroupDependency.isNonLocal())
343  return InvariantGroupDependency;
344 
345  assert(InvariantGroupDependency.isUnknown() &&
346  "InvariantGroupDependency should be only unknown at this point");
347  return SimpleDep;
348 }
349 
352  BasicBlock *BB) {
353 
354  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
355  if (!InvariantGroupMD)
356  return MemDepResult::getUnknown();
357 
358  // Take the ptr operand after all casts and geps 0. This way we can search
359  // cast graph down only.
360  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
361 
362  // It's is not safe to walk the use list of global value, because function
363  // passes aren't allowed to look outside their functions.
364  // FIXME: this could be fixed by filtering instructions from outside
365  // of current function.
366  if (isa<GlobalValue>(LoadOperand))
367  return MemDepResult::getUnknown();
368 
369  // Queue to process all pointers that are equivalent to load operand.
370  SmallVector<const Value *, 8> LoadOperandsQueue;
371  LoadOperandsQueue.push_back(LoadOperand);
372 
373  Instruction *ClosestDependency = nullptr;
374  // Order of instructions in uses list is unpredictible. In order to always
375  // get the same result, we will look for the closest dominance.
376  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
377  assert(Other && "Must call it with not null instruction");
378  if (Best == nullptr || DT.dominates(Best, Other))
379  return Other;
380  return Best;
381  };
382 
383 
384  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
385  // we will see all the instructions. This should be fixed in MSSA.
386  while (!LoadOperandsQueue.empty()) {
387  const Value *Ptr = LoadOperandsQueue.pop_back_val();
388  assert(Ptr && !isa<GlobalValue>(Ptr) &&
389  "Null or GlobalValue should not be inserted");
390 
391  for (const Use &Us : Ptr->uses()) {
392  auto *U = dyn_cast<Instruction>(Us.getUser());
393  if (!U || U == LI || !DT.dominates(U, LI))
394  continue;
395 
396  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
397  // users. U = bitcast Ptr
398  if (isa<BitCastInst>(U)) {
399  LoadOperandsQueue.push_back(U);
400  continue;
401  }
402  // Gep with zeros is equivalent to bitcast.
403  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
404  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
405  // typeless pointers will be ready then both cases will be gone
406  // (and this BFS also won't be needed).
407  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
408  if (GEP->hasAllZeroIndices()) {
409  LoadOperandsQueue.push_back(U);
410  continue;
411  }
412 
413  // If we hit load/store with the same invariant.group metadata (and the
414  // same pointer operand) we can assume that value pointed by pointer
415  // operand didn't change.
416  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
417  U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
418  ClosestDependency = GetClosestDependency(ClosestDependency, U);
419  }
420  }
421 
422  if (!ClosestDependency)
423  return MemDepResult::getUnknown();
424  if (ClosestDependency->getParent() == BB)
425  return MemDepResult::getDef(ClosestDependency);
426  // Def(U) can't be returned here because it is non-local. If local
427  // dependency won't be found then return nonLocal counting that the
428  // user will call getNonLocalPointerDependency, which will return cached
429  // result.
430  NonLocalDefsCache.try_emplace(
431  LI, NonLocalDepResult(ClosestDependency->getParent(),
432  MemDepResult::getDef(ClosestDependency), nullptr));
433  return MemDepResult::getNonLocal();
434 }
435 
437  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
438  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
439  bool isInvariantLoad = false;
440 
441  if (!Limit) {
442  unsigned DefaultLimit = BlockScanLimit;
443  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
444  &DefaultLimit);
445  }
446 
447  // We must be careful with atomic accesses, as they may allow another thread
448  // to touch this location, clobbering it. We are conservative: if the
449  // QueryInst is not a simple (non-atomic) memory access, we automatically
450  // return getClobber.
451  // If it is simple, we know based on the results of
452  // "Compiler testing via a theory of sound optimisations in the C11/C++11
453  // memory model" in PLDI 2013, that a non-atomic location can only be
454  // clobbered between a pair of a release and an acquire action, with no
455  // access to the location in between.
456  // Here is an example for giving the general intuition behind this rule.
457  // In the following code:
458  // store x 0;
459  // release action; [1]
460  // acquire action; [4]
461  // %val = load x;
462  // It is unsafe to replace %val by 0 because another thread may be running:
463  // acquire action; [2]
464  // store x 42;
465  // release action; [3]
466  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
467  // being 42. A key property of this program however is that if either
468  // 1 or 4 were missing, there would be a race between the store of 42
469  // either the store of 0 or the load (making the whole program racy).
470  // The paper mentioned above shows that the same property is respected
471  // by every program that can detect any optimization of that kind: either
472  // it is racy (undefined) or there is a release followed by an acquire
473  // between the pair of accesses under consideration.
474 
475  // If the load is invariant, we "know" that it doesn't alias *any* write. We
476  // do want to respect mustalias results since defs are useful for value
477  // forwarding, but any mayalias write can be assumed to be noalias.
478  // Arguably, this logic should be pushed inside AliasAnalysis itself.
479  if (isLoad && QueryInst) {
480  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
481  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
482  isInvariantLoad = true;
483  }
484 
485  const DataLayout &DL = BB->getModule()->getDataLayout();
486 
487  // Create a numbered basic block to lazily compute and cache instruction
488  // positions inside a BB. This is used to provide fast queries for relative
489  // position between two instructions in a BB and can be used by
490  // AliasAnalysis::callCapturesBefore.
491  OrderedBasicBlock OBB(BB);
492 
493  // Return "true" if and only if the instruction I is either a non-simple
494  // load or a non-simple store.
495  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
496  if (auto *LI = dyn_cast<LoadInst>(I))
497  return !LI->isSimple();
498  if (auto *SI = dyn_cast<StoreInst>(I))
499  return !SI->isSimple();
500  return false;
501  };
502 
503  // Return "true" if I is not a load and not a store, but it does access
504  // memory.
505  auto isOtherMemAccess = [](Instruction *I) -> bool {
506  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
507  };
508 
509  // Walk backwards through the basic block, looking for dependencies.
510  while (ScanIt != BB->begin()) {
511  Instruction *Inst = &*--ScanIt;
512 
513  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
514  // Debug intrinsics don't (and can't) cause dependencies.
515  if (isa<DbgInfoIntrinsic>(II))
516  continue;
517 
518  // Limit the amount of scanning we do so we don't end up with quadratic
519  // running time on extreme testcases.
520  --*Limit;
521  if (!*Limit)
522  return MemDepResult::getUnknown();
523 
524  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
525  // If we reach a lifetime begin or end marker, then the query ends here
526  // because the value is undefined.
527  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
528  // FIXME: This only considers queries directly on the invariant-tagged
529  // pointer, not on query pointers that are indexed off of them. It'd
530  // be nice to handle that at some point (the right approach is to use
531  // GetPointerBaseWithConstantOffset).
532  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
533  return MemDepResult::getDef(II);
534  continue;
535  }
536  }
537 
538  // Values depend on loads if the pointers are must aliased. This means
539  // that a load depends on another must aliased load from the same value.
540  // One exception is atomic loads: a value can depend on an atomic load that
541  // it does not alias with when this atomic load indicates that another
542  // thread may be accessing the location.
543  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
544 
545  // While volatile access cannot be eliminated, they do not have to clobber
546  // non-aliasing locations, as normal accesses, for example, can be safely
547  // reordered with volatile accesses.
548  if (LI->isVolatile()) {
549  if (!QueryInst)
550  // Original QueryInst *may* be volatile
551  return MemDepResult::getClobber(LI);
552  if (isVolatile(QueryInst))
553  // Ordering required if QueryInst is itself volatile
554  return MemDepResult::getClobber(LI);
555  // Otherwise, volatile doesn't imply any special ordering
556  }
557 
558  // Atomic loads have complications involved.
559  // A Monotonic (or higher) load is OK if the query inst is itself not
560  // atomic.
561  // FIXME: This is overly conservative.
562  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
563  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
564  isOtherMemAccess(QueryInst))
565  return MemDepResult::getClobber(LI);
566  if (LI->getOrdering() != AtomicOrdering::Monotonic)
567  return MemDepResult::getClobber(LI);
568  }
569 
570  MemoryLocation LoadLoc = MemoryLocation::get(LI);
571 
572  // If we found a pointer, check if it could be the same as our pointer.
573  AliasResult R = AA.alias(LoadLoc, MemLoc);
574 
575  if (isLoad) {
576  if (R == NoAlias)
577  continue;
578 
579  // Must aliased loads are defs of each other.
580  if (R == MustAlias)
581  return MemDepResult::getDef(Inst);
582 
583 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
584  // in terms of clobbering loads, but since it does this by looking
585  // at the clobbering load directly, it doesn't know about any
586  // phi translation that may have happened along the way.
587 
588  // If we have a partial alias, then return this as a clobber for the
589  // client to handle.
590  if (R == PartialAlias)
591  return MemDepResult::getClobber(Inst);
592 #endif
593 
594  // Random may-alias loads don't depend on each other without a
595  // dependence.
596  continue;
597  }
598 
599  // Stores don't depend on other no-aliased accesses.
600  if (R == NoAlias)
601  continue;
602 
603  // Stores don't alias loads from read-only memory.
604  if (AA.pointsToConstantMemory(LoadLoc))
605  continue;
606 
607  // Stores depend on may/must aliased loads.
608  return MemDepResult::getDef(Inst);
609  }
610 
611  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
612  // Atomic stores have complications involved.
613  // A Monotonic store is OK if the query inst is itself not atomic.
614  // FIXME: This is overly conservative.
615  if (!SI->isUnordered() && SI->isAtomic()) {
616  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
617  isOtherMemAccess(QueryInst))
619  if (SI->getOrdering() != AtomicOrdering::Monotonic)
621  }
622 
623  // FIXME: this is overly conservative.
624  // While volatile access cannot be eliminated, they do not have to clobber
625  // non-aliasing locations, as normal accesses can for example be reordered
626  // with volatile accesses.
627  if (SI->isVolatile())
628  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
629  isOtherMemAccess(QueryInst))
631 
632  // If alias analysis can tell that this store is guaranteed to not modify
633  // the query pointer, ignore it. Use getModRefInfo to handle cases where
634  // the query pointer points to constant memory etc.
635  if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef)
636  continue;
637 
638  // Ok, this store might clobber the query pointer. Check to see if it is
639  // a must alias: in this case, we want to return this as a def.
641 
642  // If we found a pointer, check if it could be the same as our pointer.
643  AliasResult R = AA.alias(StoreLoc, MemLoc);
644 
645  if (R == NoAlias)
646  continue;
647  if (R == MustAlias)
648  return MemDepResult::getDef(Inst);
649  if (isInvariantLoad)
650  continue;
651  return MemDepResult::getClobber(Inst);
652  }
653 
654  // If this is an allocation, and if we know that the accessed pointer is to
655  // the allocation, return Def. This means that there is no dependence and
656  // the access can be optimized based on that. For example, a load could
657  // turn into undef. Note that we can bypass the allocation itself when
658  // looking for a clobber in many cases; that's an alias property and is
659  // handled by BasicAA.
660  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
661  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
662  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
663  return MemDepResult::getDef(Inst);
664  }
665 
666  if (isInvariantLoad)
667  continue;
668 
669  // A release fence requires that all stores complete before it, but does
670  // not prevent the reordering of following loads or stores 'before' the
671  // fence. As a result, we look past it when finding a dependency for
672  // loads. DSE uses this to find preceeding stores to delete and thus we
673  // can't bypass the fence if the query instruction is a store.
674  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
675  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
676  continue;
677 
678  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
679  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
680  // If necessary, perform additional analysis.
681  if (MR == MRI_ModRef)
682  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
683  switch (MR) {
684  case MRI_NoModRef:
685  // If the call has no effect on the queried pointer, just ignore it.
686  continue;
687  case MRI_Mod:
688  return MemDepResult::getClobber(Inst);
689  case MRI_Ref:
690  // If the call is known to never store to the pointer, and if this is a
691  // load query, we can safely ignore it (scan past it).
692  if (isLoad)
693  continue;
694  default:
695  // Otherwise, there is a potential dependence. Return a clobber.
696  return MemDepResult::getClobber(Inst);
697  }
698  }
699 
700  // No dependence found. If this is the entry block of the function, it is
701  // unknown, otherwise it is non-local.
702  if (BB != &BB->getParent()->getEntryBlock())
703  return MemDepResult::getNonLocal();
705 }
706 
708  Instruction *ScanPos = QueryInst;
709 
710  // Check for a cached result
711  MemDepResult &LocalCache = LocalDeps[QueryInst];
712 
713  // If the cached entry is non-dirty, just return it. Note that this depends
714  // on MemDepResult's default constructing to 'dirty'.
715  if (!LocalCache.isDirty())
716  return LocalCache;
717 
718  // Otherwise, if we have a dirty entry, we know we can start the scan at that
719  // instruction, which may save us some work.
720  if (Instruction *Inst = LocalCache.getInst()) {
721  ScanPos = Inst;
722 
723  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
724  }
725 
726  BasicBlock *QueryParent = QueryInst->getParent();
727 
728  // Do the scan.
729  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
730  // No dependence found. If this is the entry block of the function, it is
731  // unknown, otherwise it is non-local.
732  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
733  LocalCache = MemDepResult::getNonLocal();
734  else
735  LocalCache = MemDepResult::getNonFuncLocal();
736  } else {
737  MemoryLocation MemLoc;
738  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
739  if (MemLoc.Ptr) {
740  // If we can do a pointer scan, make it happen.
741  bool isLoad = !(MR & MRI_Mod);
742  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
743  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
744 
745  LocalCache = getPointerDependencyFrom(
746  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
747  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
748  CallSite QueryCS(QueryInst);
749  bool isReadOnly = AA.onlyReadsMemory(QueryCS);
750  LocalCache = getCallSiteDependencyFrom(
751  QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
752  } else
753  // Non-memory instruction.
754  LocalCache = MemDepResult::getUnknown();
755  }
756 
757  // Remember the result!
758  if (Instruction *I = LocalCache.getInst())
759  ReverseLocalDeps[I].insert(QueryInst);
760 
761  return LocalCache;
762 }
763 
764 #ifndef NDEBUG
765 /// This method is used when -debug is specified to verify that cache arrays
766 /// are properly kept sorted.
768  int Count = -1) {
769  if (Count == -1)
770  Count = Cache.size();
771  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
772  "Cache isn't sorted!");
773 }
774 #endif
775 
778  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
779  "getNonLocalCallDependency should only be used on calls with "
780  "non-local deps!");
781  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
782  NonLocalDepInfo &Cache = CacheP.first;
783 
784  // This is the set of blocks that need to be recomputed. In the cached case,
785  // this can happen due to instructions being deleted etc. In the uncached
786  // case, this starts out as the set of predecessors we care about.
787  SmallVector<BasicBlock *, 32> DirtyBlocks;
788 
789  if (!Cache.empty()) {
790  // Okay, we have a cache entry. If we know it is not dirty, just return it
791  // with no computation.
792  if (!CacheP.second) {
793  ++NumCacheNonLocal;
794  return Cache;
795  }
796 
797  // If we already have a partially computed set of results, scan them to
798  // determine what is dirty, seeding our initial DirtyBlocks worklist.
799  for (auto &Entry : Cache)
800  if (Entry.getResult().isDirty())
801  DirtyBlocks.push_back(Entry.getBB());
802 
803  // Sort the cache so that we can do fast binary search lookups below.
804  std::sort(Cache.begin(), Cache.end());
805 
806  ++NumCacheDirtyNonLocal;
807  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
808  // << Cache.size() << " cached: " << *QueryInst;
809  } else {
810  // Seed DirtyBlocks with each of the preds of QueryInst's block.
811  BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
812  for (BasicBlock *Pred : PredCache.get(QueryBB))
813  DirtyBlocks.push_back(Pred);
814  ++NumUncacheNonLocal;
815  }
816 
817  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
818  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
819 
821 
822  unsigned NumSortedEntries = Cache.size();
823  DEBUG(AssertSorted(Cache));
824 
825  // Iterate while we still have blocks to update.
826  while (!DirtyBlocks.empty()) {
827  BasicBlock *DirtyBB = DirtyBlocks.back();
828  DirtyBlocks.pop_back();
829 
830  // Already processed this block?
831  if (!Visited.insert(DirtyBB).second)
832  continue;
833 
834  // Do a binary search to see if we already have an entry for this block in
835  // the cache set. If so, find it.
836  DEBUG(AssertSorted(Cache, NumSortedEntries));
837  NonLocalDepInfo::iterator Entry =
838  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
839  NonLocalDepEntry(DirtyBB));
840  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
841  --Entry;
842 
843  NonLocalDepEntry *ExistingResult = nullptr;
844  if (Entry != Cache.begin() + NumSortedEntries &&
845  Entry->getBB() == DirtyBB) {
846  // If we already have an entry, and if it isn't already dirty, the block
847  // is done.
848  if (!Entry->getResult().isDirty())
849  continue;
850 
851  // Otherwise, remember this slot so we can update the value.
852  ExistingResult = &*Entry;
853  }
854 
855  // If the dirty entry has a pointer, start scanning from it so we don't have
856  // to rescan the entire block.
857  BasicBlock::iterator ScanPos = DirtyBB->end();
858  if (ExistingResult) {
859  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
860  ScanPos = Inst->getIterator();
861  // We're removing QueryInst's use of Inst.
862  RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
863  QueryCS.getInstruction());
864  }
865  }
866 
867  // Find out if this block has a local dependency for QueryInst.
868  MemDepResult Dep;
869 
870  if (ScanPos != DirtyBB->begin()) {
871  Dep =
872  getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
873  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
874  // No dependence found. If this is the entry block of the function, it is
875  // a clobber, otherwise it is unknown.
877  } else {
879  }
880 
881  // If we had a dirty entry for the block, update it. Otherwise, just add
882  // a new entry.
883  if (ExistingResult)
884  ExistingResult->setResult(Dep);
885  else
886  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
887 
888  // If the block has a dependency (i.e. it isn't completely transparent to
889  // the value), remember the association!
890  if (!Dep.isNonLocal()) {
891  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
892  // update this when we remove instructions.
893  if (Instruction *Inst = Dep.getInst())
894  ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
895  } else {
896 
897  // If the block *is* completely transparent to the load, we need to check
898  // the predecessors of this block. Add them to our worklist.
899  for (BasicBlock *Pred : PredCache.get(DirtyBB))
900  DirtyBlocks.push_back(Pred);
901  }
902  }
903 
904  return Cache;
905 }
906 
908  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
909  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
910  bool isLoad = isa<LoadInst>(QueryInst);
911  BasicBlock *FromBB = QueryInst->getParent();
912  assert(FromBB);
913 
914  assert(Loc.Ptr->getType()->isPointerTy() &&
915  "Can't get pointer deps of a non-pointer!");
916  Result.clear();
917  {
918  // Check if there is cached Def with invariant.group. FIXME: cache might be
919  // invalid if cached instruction would be removed between call to
920  // getPointerDependencyFrom and this function.
921  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
922  if (NonLocalDefIt != NonLocalDefsCache.end()) {
923  Result.push_back(std::move(NonLocalDefIt->second));
924  NonLocalDefsCache.erase(NonLocalDefIt);
925  return;
926  }
927  }
928  // This routine does not expect to deal with volatile instructions.
929  // Doing so would require piping through the QueryInst all the way through.
930  // TODO: volatiles can't be elided, but they can be reordered with other
931  // non-volatile accesses.
932 
933  // We currently give up on any instruction which is ordered, but we do handle
934  // atomic instructions which are unordered.
935  // TODO: Handle ordered instructions
936  auto isOrdered = [](Instruction *Inst) {
937  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
938  return !LI->isUnordered();
939  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
940  return !SI->isUnordered();
941  }
942  return false;
943  };
944  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
946  const_cast<Value *>(Loc.Ptr)));
947  return;
948  }
949  const DataLayout &DL = FromBB->getModule()->getDataLayout();
950  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
951 
952  // This is the set of blocks we've inspected, and the pointer we consider in
953  // each block. Because of critical edges, we currently bail out if querying
954  // a block with multiple different pointers. This can happen during PHI
955  // translation.
957  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
958  Result, Visited, true))
959  return;
960  Result.clear();
962  const_cast<Value *>(Loc.Ptr)));
963 }
964 
965 /// Compute the memdep value for BB with Pointer/PointeeSize using either
966 /// cached information in Cache or by doing a lookup (which may use dirty cache
967 /// info if available).
968 ///
969 /// If we do a lookup, add the result to the cache.
970 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
971  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
972  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
973 
974  // Do a binary search to see if we already have an entry for this block in
975  // the cache set. If so, find it.
976  NonLocalDepInfo::iterator Entry = std::upper_bound(
977  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
978  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
979  --Entry;
980 
981  NonLocalDepEntry *ExistingResult = nullptr;
982  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
983  ExistingResult = &*Entry;
984 
985  // If we have a cached entry, and it is non-dirty, use it as the value for
986  // this dependency.
987  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
988  ++NumCacheNonLocalPtr;
989  return ExistingResult->getResult();
990  }
991 
992  // Otherwise, we have to scan for the value. If we have a dirty cache
993  // entry, start scanning from its position, otherwise we scan from the end
994  // of the block.
995  BasicBlock::iterator ScanPos = BB->end();
996  if (ExistingResult && ExistingResult->getResult().getInst()) {
997  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
998  "Instruction invalidated?");
999  ++NumCacheDirtyNonLocalPtr;
1000  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1001 
1002  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1003  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1004  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1005  } else {
1006  ++NumUncacheNonLocalPtr;
1007  }
1008 
1009  // Scan the block for the dependency.
1010  MemDepResult Dep =
1011  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1012 
1013  // If we had a dirty entry for the block, update it. Otherwise, just add
1014  // a new entry.
1015  if (ExistingResult)
1016  ExistingResult->setResult(Dep);
1017  else
1018  Cache->push_back(NonLocalDepEntry(BB, Dep));
1019 
1020  // If the block has a dependency (i.e. it isn't completely transparent to
1021  // the value), remember the reverse association because we just added it
1022  // to Cache!
1023  if (!Dep.isDef() && !Dep.isClobber())
1024  return Dep;
1025 
1026  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1027  // update MemDep when we remove instructions.
1028  Instruction *Inst = Dep.getInst();
1029  assert(Inst && "Didn't depend on anything?");
1030  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1031  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1032  return Dep;
1033 }
1034 
1035 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1036 /// array that are already properly ordered.
1037 ///
1038 /// This is optimized for the case when only a few entries are added.
1039 static void
1041  unsigned NumSortedEntries) {
1042  switch (Cache.size() - NumSortedEntries) {
1043  case 0:
1044  // done, no new entries.
1045  break;
1046  case 2: {
1047  // Two new entries, insert the last one into place.
1048  NonLocalDepEntry Val = Cache.back();
1049  Cache.pop_back();
1050  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1051  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1052  Cache.insert(Entry, Val);
1054  }
1055  case 1:
1056  // One new entry, Just insert the new value at the appropriate position.
1057  if (Cache.size() != 1) {
1058  NonLocalDepEntry Val = Cache.back();
1059  Cache.pop_back();
1060  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1061  std::upper_bound(Cache.begin(), Cache.end(), Val);
1062  Cache.insert(Entry, Val);
1063  }
1064  break;
1065  default:
1066  // Added many values, do a full scale sort.
1067  std::sort(Cache.begin(), Cache.end());
1068  break;
1069  }
1070 }
1071 
1072 /// Perform a dependency query based on pointer/pointeesize starting at the end
1073 /// of StartBB.
1074 ///
1075 /// Add any clobber/def results to the results vector and keep track of which
1076 /// blocks are visited in 'Visited'.
1077 ///
1078 /// This has special behavior for the first block queries (when SkipFirstBlock
1079 /// is true). In this special case, it ignores the contents of the specified
1080 /// block and starts returning dependence info for its predecessors.
1081 ///
1082 /// This function returns true on success, or false to indicate that it could
1083 /// not compute dependence information for some reason. This should be treated
1084 /// as a clobber dependence on the first instruction in the predecessor block.
1085 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1086  Instruction *QueryInst, const PHITransAddr &Pointer,
1087  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1089  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1090  // Look up the cached info for Pointer.
1091  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1092 
1093  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1094  // CacheKey, this value will be inserted as the associated value. Otherwise,
1095  // it'll be ignored, and we'll have to check to see if the cached size and
1096  // aa tags are consistent with the current query.
1097  NonLocalPointerInfo InitialNLPI;
1098  InitialNLPI.Size = Loc.Size;
1099  InitialNLPI.AATags = Loc.AATags;
1100 
1101  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1102  // already have one.
1103  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1104  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1105  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1106 
1107  // If we already have a cache entry for this CacheKey, we may need to do some
1108  // work to reconcile the cache entry and the current query.
1109  if (!Pair.second) {
1110  if (CacheInfo->Size < Loc.Size) {
1111  // The query's Size is greater than the cached one. Throw out the
1112  // cached data and proceed with the query at the greater size.
1113  CacheInfo->Pair = BBSkipFirstBlockPair();
1114  CacheInfo->Size = Loc.Size;
1115  for (auto &Entry : CacheInfo->NonLocalDeps)
1116  if (Instruction *Inst = Entry.getResult().getInst())
1117  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1118  CacheInfo->NonLocalDeps.clear();
1119  } else if (CacheInfo->Size > Loc.Size) {
1120  // This query's Size is less than the cached one. Conservatively restart
1121  // the query using the greater size.
1122  return getNonLocalPointerDepFromBB(
1123  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1124  StartBB, Result, Visited, SkipFirstBlock);
1125  }
1126 
1127  // If the query's AATags are inconsistent with the cached one,
1128  // conservatively throw out the cached data and restart the query with
1129  // no tag if needed.
1130  if (CacheInfo->AATags != Loc.AATags) {
1131  if (CacheInfo->AATags) {
1132  CacheInfo->Pair = BBSkipFirstBlockPair();
1133  CacheInfo->AATags = AAMDNodes();
1134  for (auto &Entry : CacheInfo->NonLocalDeps)
1135  if (Instruction *Inst = Entry.getResult().getInst())
1136  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1137  CacheInfo->NonLocalDeps.clear();
1138  }
1139  if (Loc.AATags)
1140  return getNonLocalPointerDepFromBB(
1141  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1142  Visited, SkipFirstBlock);
1143  }
1144  }
1145 
1146  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1147 
1148  // If we have valid cached information for exactly the block we are
1149  // investigating, just return it with no recomputation.
1150  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1151  // We have a fully cached result for this query then we can just return the
1152  // cached results and populate the visited set. However, we have to verify
1153  // that we don't already have conflicting results for these blocks. Check
1154  // to ensure that if a block in the results set is in the visited set that
1155  // it was for the same pointer query.
1156  if (!Visited.empty()) {
1157  for (auto &Entry : *Cache) {
1159  Visited.find(Entry.getBB());
1160  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1161  continue;
1162 
1163  // We have a pointer mismatch in a block. Just return false, saying
1164  // that something was clobbered in this result. We could also do a
1165  // non-fully cached query, but there is little point in doing this.
1166  return false;
1167  }
1168  }
1169 
1170  Value *Addr = Pointer.getAddr();
1171  for (auto &Entry : *Cache) {
1172  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1173  if (Entry.getResult().isNonLocal()) {
1174  continue;
1175  }
1176 
1177  if (DT.isReachableFromEntry(Entry.getBB())) {
1178  Result.push_back(
1179  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1180  }
1181  }
1182  ++NumCacheCompleteNonLocalPtr;
1183  return true;
1184  }
1185 
1186  // Otherwise, either this is a new block, a block with an invalid cache
1187  // pointer or one that we're about to invalidate by putting more info into it
1188  // than its valid cache info. If empty, the result will be valid cache info,
1189  // otherwise it isn't.
1190  if (Cache->empty())
1191  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1192  else
1193  CacheInfo->Pair = BBSkipFirstBlockPair();
1194 
1196  Worklist.push_back(StartBB);
1197 
1198  // PredList used inside loop.
1200 
1201  // Keep track of the entries that we know are sorted. Previously cached
1202  // entries will all be sorted. The entries we add we only sort on demand (we
1203  // don't insert every element into its sorted position). We know that we
1204  // won't get any reuse from currently inserted values, because we don't
1205  // revisit blocks after we insert info for them.
1206  unsigned NumSortedEntries = Cache->size();
1207  unsigned WorklistEntries = BlockNumberLimit;
1208  bool GotWorklistLimit = false;
1209  DEBUG(AssertSorted(*Cache));
1210 
1211  while (!Worklist.empty()) {
1212  BasicBlock *BB = Worklist.pop_back_val();
1213 
1214  // If we do process a large number of blocks it becomes very expensive and
1215  // likely it isn't worth worrying about
1216  if (Result.size() > NumResultsLimit) {
1217  Worklist.clear();
1218  // Sort it now (if needed) so that recursive invocations of
1219  // getNonLocalPointerDepFromBB and other routines that could reuse the
1220  // cache value will only see properly sorted cache arrays.
1221  if (Cache && NumSortedEntries != Cache->size()) {
1222  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1223  }
1224  // Since we bail out, the "Cache" set won't contain all of the
1225  // results for the query. This is ok (we can still use it to accelerate
1226  // specific block queries) but we can't do the fastpath "return all
1227  // results from the set". Clear out the indicator for this.
1228  CacheInfo->Pair = BBSkipFirstBlockPair();
1229  return false;
1230  }
1231 
1232  // Skip the first block if we have it.
1233  if (!SkipFirstBlock) {
1234  // Analyze the dependency of *Pointer in FromBB. See if we already have
1235  // been here.
1236  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1237 
1238  // Get the dependency info for Pointer in BB. If we have cached
1239  // information, we will use it, otherwise we compute it.
1240  DEBUG(AssertSorted(*Cache, NumSortedEntries));
1241  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1242  Cache, NumSortedEntries);
1243 
1244  // If we got a Def or Clobber, add this to the list of results.
1245  if (!Dep.isNonLocal()) {
1246  if (DT.isReachableFromEntry(BB)) {
1247  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1248  continue;
1249  }
1250  }
1251  }
1252 
1253  // If 'Pointer' is an instruction defined in this block, then we need to do
1254  // phi translation to change it into a value live in the predecessor block.
1255  // If not, we just add the predecessors to the worklist and scan them with
1256  // the same Pointer.
1257  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1258  SkipFirstBlock = false;
1260  for (BasicBlock *Pred : PredCache.get(BB)) {
1261  // Verify that we haven't looked at this block yet.
1262  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1263  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1264  if (InsertRes.second) {
1265  // First time we've looked at *PI.
1266  NewBlocks.push_back(Pred);
1267  continue;
1268  }
1269 
1270  // If we have seen this block before, but it was with a different
1271  // pointer then we have a phi translation failure and we have to treat
1272  // this as a clobber.
1273  if (InsertRes.first->second != Pointer.getAddr()) {
1274  // Make sure to clean up the Visited map before continuing on to
1275  // PredTranslationFailure.
1276  for (unsigned i = 0; i < NewBlocks.size(); i++)
1277  Visited.erase(NewBlocks[i]);
1278  goto PredTranslationFailure;
1279  }
1280  }
1281  if (NewBlocks.size() > WorklistEntries) {
1282  // Make sure to clean up the Visited map before continuing on to
1283  // PredTranslationFailure.
1284  for (unsigned i = 0; i < NewBlocks.size(); i++)
1285  Visited.erase(NewBlocks[i]);
1286  GotWorklistLimit = true;
1287  goto PredTranslationFailure;
1288  }
1289  WorklistEntries -= NewBlocks.size();
1290  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1291  continue;
1292  }
1293 
1294  // We do need to do phi translation, if we know ahead of time we can't phi
1295  // translate this value, don't even try.
1296  if (!Pointer.IsPotentiallyPHITranslatable())
1297  goto PredTranslationFailure;
1298 
1299  // We may have added values to the cache list before this PHI translation.
1300  // If so, we haven't done anything to ensure that the cache remains sorted.
1301  // Sort it now (if needed) so that recursive invocations of
1302  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1303  // value will only see properly sorted cache arrays.
1304  if (Cache && NumSortedEntries != Cache->size()) {
1305  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1306  NumSortedEntries = Cache->size();
1307  }
1308  Cache = nullptr;
1309 
1310  PredList.clear();
1311  for (BasicBlock *Pred : PredCache.get(BB)) {
1312  PredList.push_back(std::make_pair(Pred, Pointer));
1313 
1314  // Get the PHI translated pointer in this predecessor. This can fail if
1315  // not translatable, in which case the getAddr() returns null.
1316  PHITransAddr &PredPointer = PredList.back().second;
1317  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1318  Value *PredPtrVal = PredPointer.getAddr();
1319 
1320  // Check to see if we have already visited this pred block with another
1321  // pointer. If so, we can't do this lookup. This failure can occur
1322  // with PHI translation when a critical edge exists and the PHI node in
1323  // the successor translates to a pointer value different than the
1324  // pointer the block was first analyzed with.
1325  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1326  Visited.insert(std::make_pair(Pred, PredPtrVal));
1327 
1328  if (!InsertRes.second) {
1329  // We found the pred; take it off the list of preds to visit.
1330  PredList.pop_back();
1331 
1332  // If the predecessor was visited with PredPtr, then we already did
1333  // the analysis and can ignore it.
1334  if (InsertRes.first->second == PredPtrVal)
1335  continue;
1336 
1337  // Otherwise, the block was previously analyzed with a different
1338  // pointer. We can't represent the result of this case, so we just
1339  // treat this as a phi translation failure.
1340 
1341  // Make sure to clean up the Visited map before continuing on to
1342  // PredTranslationFailure.
1343  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1344  Visited.erase(PredList[i].first);
1345 
1346  goto PredTranslationFailure;
1347  }
1348  }
1349 
1350  // Actually process results here; this need to be a separate loop to avoid
1351  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1352  // any results for. (getNonLocalPointerDepFromBB will modify our
1353  // datastructures in ways the code after the PredTranslationFailure label
1354  // doesn't expect.)
1355  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1356  BasicBlock *Pred = PredList[i].first;
1357  PHITransAddr &PredPointer = PredList[i].second;
1358  Value *PredPtrVal = PredPointer.getAddr();
1359 
1360  bool CanTranslate = true;
1361  // If PHI translation was unable to find an available pointer in this
1362  // predecessor, then we have to assume that the pointer is clobbered in
1363  // that predecessor. We can still do PRE of the load, which would insert
1364  // a computation of the pointer in this predecessor.
1365  if (!PredPtrVal)
1366  CanTranslate = false;
1367 
1368  // FIXME: it is entirely possible that PHI translating will end up with
1369  // the same value. Consider PHI translating something like:
1370  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1371  // to recurse here, pedantically speaking.
1372 
1373  // If getNonLocalPointerDepFromBB fails here, that means the cached
1374  // result conflicted with the Visited list; we have to conservatively
1375  // assume it is unknown, but this also does not block PRE of the load.
1376  if (!CanTranslate ||
1377  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1378  Loc.getWithNewPtr(PredPtrVal), isLoad,
1379  Pred, Result, Visited)) {
1380  // Add the entry to the Result list.
1381  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1382  Result.push_back(Entry);
1383 
1384  // Since we had a phi translation failure, the cache for CacheKey won't
1385  // include all of the entries that we need to immediately satisfy future
1386  // queries. Mark this in NonLocalPointerDeps by setting the
1387  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1388  // cached value to do more work but not miss the phi trans failure.
1389  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1390  NLPI.Pair = BBSkipFirstBlockPair();
1391  continue;
1392  }
1393  }
1394 
1395  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1396  CacheInfo = &NonLocalPointerDeps[CacheKey];
1397  Cache = &CacheInfo->NonLocalDeps;
1398  NumSortedEntries = Cache->size();
1399 
1400  // Since we did phi translation, the "Cache" set won't contain all of the
1401  // results for the query. This is ok (we can still use it to accelerate
1402  // specific block queries) but we can't do the fastpath "return all
1403  // results from the set" Clear out the indicator for this.
1404  CacheInfo->Pair = BBSkipFirstBlockPair();
1405  SkipFirstBlock = false;
1406  continue;
1407 
1408  PredTranslationFailure:
1409  // The following code is "failure"; we can't produce a sane translation
1410  // for the given block. It assumes that we haven't modified any of
1411  // our datastructures while processing the current block.
1412 
1413  if (!Cache) {
1414  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1415  CacheInfo = &NonLocalPointerDeps[CacheKey];
1416  Cache = &CacheInfo->NonLocalDeps;
1417  NumSortedEntries = Cache->size();
1418  }
1419 
1420  // Since we failed phi translation, the "Cache" set won't contain all of the
1421  // results for the query. This is ok (we can still use it to accelerate
1422  // specific block queries) but we can't do the fastpath "return all
1423  // results from the set". Clear out the indicator for this.
1424  CacheInfo->Pair = BBSkipFirstBlockPair();
1425 
1426  // If *nothing* works, mark the pointer as unknown.
1427  //
1428  // If this is the magic first block, return this as a clobber of the whole
1429  // incoming value. Since we can't phi translate to one of the predecessors,
1430  // we have to bail out.
1431  if (SkipFirstBlock)
1432  return false;
1433 
1434  bool foundBlock = false;
1435  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1436  if (I.getBB() != BB)
1437  continue;
1438 
1439  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1440  !DT.isReachableFromEntry(BB)) &&
1441  "Should only be here with transparent block");
1442  foundBlock = true;
1443  I.setResult(MemDepResult::getUnknown());
1444  Result.push_back(
1445  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1446  break;
1447  }
1448  (void)foundBlock; (void)GotWorklistLimit;
1449  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1450  }
1451 
1452  // Okay, we're done now. If we added new values to the cache, re-sort it.
1453  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1454  DEBUG(AssertSorted(*Cache));
1455  return true;
1456 }
1457 
1458 /// If P exists in CachedNonLocalPointerInfo, remove it.
1459 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1460  ValueIsLoadPair P) {
1461  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1462  if (It == NonLocalPointerDeps.end())
1463  return;
1464 
1465  // Remove all of the entries in the BB->val map. This involves removing
1466  // instructions from the reverse map.
1467  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1468 
1469  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1470  Instruction *Target = PInfo[i].getResult().getInst();
1471  if (!Target)
1472  continue; // Ignore non-local dep results.
1473  assert(Target->getParent() == PInfo[i].getBB());
1474 
1475  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1476  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1477  }
1478 
1479  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1480  NonLocalPointerDeps.erase(It);
1481 }
1482 
1484  // If Ptr isn't really a pointer, just ignore it.
1485  if (!Ptr->getType()->isPointerTy())
1486  return;
1487  // Flush store info for the pointer.
1488  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1489  // Flush load info for the pointer.
1490  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1491 }
1492 
1494  PredCache.clear();
1495 }
1496 
1498  // Walk through the Non-local dependencies, removing this one as the value
1499  // for any cached queries.
1500  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1501  if (NLDI != NonLocalDeps.end()) {
1502  NonLocalDepInfo &BlockMap = NLDI->second.first;
1503  for (auto &Entry : BlockMap)
1504  if (Instruction *Inst = Entry.getResult().getInst())
1505  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1506  NonLocalDeps.erase(NLDI);
1507  }
1508 
1509  // If we have a cached local dependence query for this instruction, remove it.
1510  //
1511  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1512  if (LocalDepEntry != LocalDeps.end()) {
1513  // Remove us from DepInst's reverse set now that the local dep info is gone.
1514  if (Instruction *Inst = LocalDepEntry->second.getInst())
1515  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1516 
1517  // Remove this local dependency info.
1518  LocalDeps.erase(LocalDepEntry);
1519  }
1520 
1521  // If we have any cached pointer dependencies on this instruction, remove
1522  // them. If the instruction has non-pointer type, then it can't be a pointer
1523  // base.
1524 
1525  // Remove it from both the load info and the store info. The instruction
1526  // can't be in either of these maps if it is non-pointer.
1527  if (RemInst->getType()->isPointerTy()) {
1528  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1529  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1530  }
1531 
1532  // Loop over all of the things that depend on the instruction we're removing.
1533  //
1535 
1536  // If we find RemInst as a clobber or Def in any of the maps for other values,
1537  // we need to replace its entry with a dirty version of the instruction after
1538  // it. If RemInst is a terminator, we use a null dirty value.
1539  //
1540  // Using a dirty version of the instruction after RemInst saves having to scan
1541  // the entire block to get to this point.
1542  MemDepResult NewDirtyVal;
1543  if (!RemInst->isTerminator())
1544  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1545 
1546  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1547  if (ReverseDepIt != ReverseLocalDeps.end()) {
1548  // RemInst can't be the terminator if it has local stuff depending on it.
1549  assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
1550  "Nothing can locally depend on a terminator");
1551 
1552  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1553  assert(InstDependingOnRemInst != RemInst &&
1554  "Already removed our local dep info");
1555 
1556  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1557 
1558  // Make sure to remember that new things depend on NewDepInst.
1559  assert(NewDirtyVal.getInst() &&
1560  "There is no way something else can have "
1561  "a local dep on this if it is a terminator!");
1562  ReverseDepsToAdd.push_back(
1563  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1564  }
1565 
1566  ReverseLocalDeps.erase(ReverseDepIt);
1567 
1568  // Add new reverse deps after scanning the set, to avoid invalidating the
1569  // 'ReverseDeps' reference.
1570  while (!ReverseDepsToAdd.empty()) {
1571  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1572  ReverseDepsToAdd.back().second);
1573  ReverseDepsToAdd.pop_back();
1574  }
1575  }
1576 
1577  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1578  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1579  for (Instruction *I : ReverseDepIt->second) {
1580  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1581 
1582  PerInstNLInfo &INLD = NonLocalDeps[I];
1583  // The information is now dirty!
1584  INLD.second = true;
1585 
1586  for (auto &Entry : INLD.first) {
1587  if (Entry.getResult().getInst() != RemInst)
1588  continue;
1589 
1590  // Convert to a dirty entry for the subsequent instruction.
1591  Entry.setResult(NewDirtyVal);
1592 
1593  if (Instruction *NextI = NewDirtyVal.getInst())
1594  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1595  }
1596  }
1597 
1598  ReverseNonLocalDeps.erase(ReverseDepIt);
1599 
1600  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1601  while (!ReverseDepsToAdd.empty()) {
1602  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1603  ReverseDepsToAdd.back().second);
1604  ReverseDepsToAdd.pop_back();
1605  }
1606  }
1607 
1608  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1609  // value in the NonLocalPointerDeps info.
1610  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1611  ReverseNonLocalPtrDeps.find(RemInst);
1612  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1614  ReversePtrDepsToAdd;
1615 
1616  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1617  assert(P.getPointer() != RemInst &&
1618  "Already removed NonLocalPointerDeps info for RemInst");
1619 
1620  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1621 
1622  // The cache is not valid for any specific block anymore.
1623  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1624 
1625  // Update any entries for RemInst to use the instruction after it.
1626  for (auto &Entry : NLPDI) {
1627  if (Entry.getResult().getInst() != RemInst)
1628  continue;
1629 
1630  // Convert to a dirty entry for the subsequent instruction.
1631  Entry.setResult(NewDirtyVal);
1632 
1633  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1634  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1635  }
1636 
1637  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1638  // subsequent value may invalidate the sortedness.
1639  std::sort(NLPDI.begin(), NLPDI.end());
1640  }
1641 
1642  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1643 
1644  while (!ReversePtrDepsToAdd.empty()) {
1645  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1646  ReversePtrDepsToAdd.back().second);
1647  ReversePtrDepsToAdd.pop_back();
1648  }
1649  }
1650 
1651  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1652  DEBUG(verifyRemoved(RemInst));
1653 }
1654 
1655 /// Verify that the specified instruction does not occur in our internal data
1656 /// structures.
1657 ///
1658 /// This function verifies by asserting in debug builds.
1659 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1660 #ifndef NDEBUG
1661  for (const auto &DepKV : LocalDeps) {
1662  assert(DepKV.first != D && "Inst occurs in data structures");
1663  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1664  }
1665 
1666  for (const auto &DepKV : NonLocalPointerDeps) {
1667  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1668  for (const auto &Entry : DepKV.second.NonLocalDeps)
1669  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1670  }
1671 
1672  for (const auto &DepKV : NonLocalDeps) {
1673  assert(DepKV.first != D && "Inst occurs in data structures");
1674  const PerInstNLInfo &INLD = DepKV.second;
1675  for (const auto &Entry : INLD.first)
1676  assert(Entry.getResult().getInst() != D &&
1677  "Inst occurs in data structures");
1678  }
1679 
1680  for (const auto &DepKV : ReverseLocalDeps) {
1681  assert(DepKV.first != D && "Inst occurs in data structures");
1682  for (Instruction *Inst : DepKV.second)
1683  assert(Inst != D && "Inst occurs in data structures");
1684  }
1685 
1686  for (const auto &DepKV : ReverseNonLocalDeps) {
1687  assert(DepKV.first != D && "Inst occurs in data structures");
1688  for (Instruction *Inst : DepKV.second)
1689  assert(Inst != D && "Inst occurs in data structures");
1690  }
1691 
1692  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1693  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1694 
1695  for (ValueIsLoadPair P : DepKV.second)
1696  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1697  "Inst occurs in ReverseNonLocalPtrDeps map");
1698  }
1699 #endif
1700 }
1701 
1702 AnalysisKey MemoryDependenceAnalysis::Key;
1703 
1706  auto &AA = AM.getResult<AAManager>(F);
1707  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1708  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1709  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1710  return MemoryDependenceResults(AA, AC, TLI, DT);
1711 }
1712 
1714 
1716  "Memory Dependence Analysis", false, true)
1723 
1724 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1726 }
1727 
1729 
1731  MemDep.reset();
1732 }
1733 
1735  AU.setPreservesAll();
1740 }
1741 
1744  // Check whether our analysis is preserved.
1745  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1746  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1747  // If not, give up now.
1748  return true;
1749 
1750  // Check whether the analyses we depend on became invalid for any reason.
1751  if (Inv.invalidate<AAManager>(F, PA) ||
1752  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1753  Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1754  return true;
1755 
1756  // Otherwise this analysis result remains valid.
1757  return false;
1758 }
1759 
1761  return BlockScanLimit;
1762 }
1763 
1765  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1766  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1767  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1768  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1769  MemDep.emplace(AA, AC, TLI, DT);
1770  return false;
1771 }
The two locations precisely alias each other.
Definition: AliasAnalysis.h:85
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:543
iterator_range< use_iterator > uses()
Definition: Value.h:326
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM's alias analysis passes.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
Atomic ordering constants.
STATISTIC(NumFunctions,"Total number of functions")
size_t i
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:430
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:504
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:83
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
bool onlyReadsMemory(ImmutableCallSite CS)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
bool isSimple() const
Definition: Instructions.h:263
The access modifies the value stored in memory.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
MemoryLocation getWithNewSize(uint64_t NewSize) const
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory ""dependency analysis (default = 100)"))
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:94
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
MemoryLocation getWithoutAATags() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
MemoryLocation getWithNewPtr(const Value *NewPtr) const
An analysis that produces MemoryDependenceResults for a function.
The access references the value stored in memory.
Definition: AliasAnalysis.h:98
std::vector< NonLocalDepEntry > NonLocalDepInfo
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:662
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
#define F(x, y, z)
Definition: MD5.cpp:51
bool mayReadFromMemory() const
Return true if this instruction may read memory.
PointerTy getPointer() const
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:269
An instruction for storing to memory.
Definition: Instructions.h:300
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
The access neither references nor modifies the value stored in memory.
Definition: AliasAnalysis.h:96
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
const NonLocalDepInfo & getNonLocalCallDependency(CallSite QueryCS)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction's result is undefined.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
size_type size() const
Definition: SmallPtrSet.h:99
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
This file contains the declarations for the subclasses of Constant, which represent the different fla...
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass,"memdep","Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:73
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:250
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getPointerOperand()
Definition: Instructions.h:270
const MemDepResult & getResult() const
self_iterator getIterator()
Definition: ilist_node.h:81
void setResult(const MemDepResult &R)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:619
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
A memory dependence query can return one of three different answers.
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void clear()
clear - Remove all information.
bool isTerminator() const
Definition: Instruction.h:114
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
A function analysis which provides an AssumptionCache.
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:307
InstrTy * getInstruction() const
Definition: CallSite.h:93
iterator end()
Definition: BasicBlock.h:230
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static bool isReadOnly(const GlobalValue *GV)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:175
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:625
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:490
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:62
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
const BasicBlock & getEntryBlock() const
Definition: Function.h:519
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:122
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
static bool isStrongerThanUnordered(AtomicOrdering ao)
Basic Alias true
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
Memory Dependence false
The access both references and modifies the value stored in memory.
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
AnalysisUsage & addRequiredTransitive()
Memory Dependence Analysis
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:525
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over \<a particular IR unit\>" (e...
Definition: PassManager.h:361
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
LLVM Value Representation.
Definition: Value.h:71
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
#define DEBUG(X)
Definition: Debug.h:100
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory ""dependency analysis (default = 1000)"))
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
static bool isVolatile(Instruction *Inst)
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Dependence - This class represents a dependence between two memory memory references in a function...
Value * getAddr() const
Definition: PHITransAddr.h:60
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:64
const BasicBlock * getParent() const
Definition: Instruction.h:62
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
uint64_t Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.