LLVM  6.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/Debug.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <iterator>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "memdep"
53 
54 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
55 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
56 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
57 
58 STATISTIC(NumCacheNonLocalPtr,
59  "Number of fully cached non-local ptr responses");
60 STATISTIC(NumCacheDirtyNonLocalPtr,
61  "Number of cached, but dirty, non-local ptr responses");
62 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
63 STATISTIC(NumCacheCompleteNonLocalPtr,
64  "Number of block queries that were completely cached");
65 
66 // Limit for the number of instructions to scan in a block.
67 
69  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
70  cl::desc("The number of instructions to scan in a block in memory "
71  "dependency analysis (default = 100)"));
72 
73 static cl::opt<unsigned>
74  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
75  cl::desc("The number of blocks to scan during memory "
76  "dependency analysis (default = 1000)"));
77 
78 // Limit on the number of memdep results to process.
79 static const unsigned int NumResultsLimit = 100;
80 
81 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
82 ///
83 /// If the set becomes empty, remove Inst's entry.
84 template <typename KeyTy>
85 static void
87  Instruction *Inst, KeyTy Val) {
88  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
89  ReverseMap.find(Inst);
90  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
91  bool Found = InstIt->second.erase(Val);
92  assert(Found && "Invalid reverse map!");
93  (void)Found;
94  if (InstIt->second.empty())
95  ReverseMap.erase(InstIt);
96 }
97 
98 /// If the given instruction references a specific memory location, fill in Loc
99 /// with the details, otherwise set Loc.Ptr to null.
100 ///
101 /// Returns a ModRefInfo value describing the general behavior of the
102 /// instruction.
104  const TargetLibraryInfo &TLI) {
105  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
106  if (LI->isUnordered()) {
107  Loc = MemoryLocation::get(LI);
108  return MRI_Ref;
109  }
110  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
111  Loc = MemoryLocation::get(LI);
112  return MRI_ModRef;
113  }
114  Loc = MemoryLocation();
115  return MRI_ModRef;
116  }
117 
118  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
119  if (SI->isUnordered()) {
120  Loc = MemoryLocation::get(SI);
121  return MRI_Mod;
122  }
123  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
124  Loc = MemoryLocation::get(SI);
125  return MRI_ModRef;
126  }
127  Loc = MemoryLocation();
128  return MRI_ModRef;
129  }
130 
131  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
132  Loc = MemoryLocation::get(V);
133  return MRI_ModRef;
134  }
135 
136  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
137  // calls to free() deallocate the entire structure
138  Loc = MemoryLocation(CI->getArgOperand(0));
139  return MRI_Mod;
140  }
141 
142  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
143  AAMDNodes AAInfo;
144 
145  switch (II->getIntrinsicID()) {
146  case Intrinsic::lifetime_start:
147  case Intrinsic::lifetime_end:
148  case Intrinsic::invariant_start:
149  II->getAAMetadata(AAInfo);
150  Loc = MemoryLocation(
151  II->getArgOperand(1),
152  cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
153  // These intrinsics don't really modify the memory, but returning Mod
154  // will allow them to be handled conservatively.
155  return MRI_Mod;
156  case Intrinsic::invariant_end:
157  II->getAAMetadata(AAInfo);
158  Loc = MemoryLocation(
159  II->getArgOperand(2),
160  cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
161  // These intrinsics don't really modify the memory, but returning Mod
162  // will allow them to be handled conservatively.
163  return MRI_Mod;
164  default:
165  break;
166  }
167  }
168 
169  // Otherwise, just do the coarse-grained thing that always works.
170  if (Inst->mayWriteToMemory())
171  return MRI_ModRef;
172  if (Inst->mayReadFromMemory())
173  return MRI_Ref;
174  return MRI_NoModRef;
175 }
176 
177 /// Private helper for finding the local dependencies of a call site.
178 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
179  CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
180  BasicBlock *BB) {
181  unsigned Limit = BlockScanLimit;
182 
183  // Walk backwards through the block, looking for dependencies.
184  while (ScanIt != BB->begin()) {
185  // Limit the amount of scanning we do so we don't end up with quadratic
186  // running time on extreme testcases.
187  --Limit;
188  if (!Limit)
189  return MemDepResult::getUnknown();
190 
191  Instruction *Inst = &*--ScanIt;
192 
193  // If this inst is a memory op, get the pointer it accessed
194  MemoryLocation Loc;
195  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
196  if (Loc.Ptr) {
197  // A simple instruction.
198  if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef)
199  return MemDepResult::getClobber(Inst);
200  continue;
201  }
202 
203  if (auto InstCS = CallSite(Inst)) {
204  // Debug intrinsics don't cause dependences.
205  if (isa<DbgInfoIntrinsic>(Inst))
206  continue;
207  // If these two calls do not interfere, look past it.
208  switch (AA.getModRefInfo(CS, InstCS)) {
209  case MRI_NoModRef:
210  // If the two calls are the same, return InstCS as a Def, so that
211  // CS can be found redundant and eliminated.
212  if (isReadOnlyCall && !(MR & MRI_Mod) &&
214  return MemDepResult::getDef(Inst);
215 
216  // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
217  // keep scanning.
218  continue;
219  default:
220  return MemDepResult::getClobber(Inst);
221  }
222  }
223 
224  // If we could not obtain a pointer for the instruction and the instruction
225  // touches memory then assume that this is a dependency.
226  if (MR != MRI_NoModRef)
227  return MemDepResult::getClobber(Inst);
228  }
229 
230  // No dependence found. If this is the entry block of the function, it is
231  // unknown, otherwise it is non-local.
232  if (BB != &BB->getParent()->getEntryBlock())
233  return MemDepResult::getNonLocal();
235 }
236 
238  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
239  const LoadInst *LI) {
240  // We can only extend simple integer loads.
241  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
242  return 0;
243 
244  // Load widening is hostile to ThreadSanitizer: it may cause false positives
245  // or make the reports more cryptic (access sizes are wrong).
246  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
247  return 0;
248 
249  const DataLayout &DL = LI->getModule()->getDataLayout();
250 
251  // Get the base of this load.
252  int64_t LIOffs = 0;
253  const Value *LIBase =
255 
256  // If the two pointers are not based on the same pointer, we can't tell that
257  // they are related.
258  if (LIBase != MemLocBase)
259  return 0;
260 
261  // Okay, the two values are based on the same pointer, but returned as
262  // no-alias. This happens when we have things like two byte loads at "P+1"
263  // and "P+3". Check to see if increasing the size of the "LI" load up to its
264  // alignment (or the largest native integer type) will allow us to load all
265  // the bits required by MemLoc.
266 
267  // If MemLoc is before LI, then no widening of LI will help us out.
268  if (MemLocOffs < LIOffs)
269  return 0;
270 
271  // Get the alignment of the load in bytes. We assume that it is safe to load
272  // any legal integer up to this size without a problem. For example, if we're
273  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
274  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
275  // to i16.
276  unsigned LoadAlign = LI->getAlignment();
277 
278  int64_t MemLocEnd = MemLocOffs + MemLocSize;
279 
280  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
281  if (LIOffs + LoadAlign < MemLocEnd)
282  return 0;
283 
284  // This is the size of the load to try. Start with the next larger power of
285  // two.
286  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
287  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
288 
289  while (true) {
290  // If this load size is bigger than our known alignment or would not fit
291  // into a native integer register, then we fail.
292  if (NewLoadByteSize > LoadAlign ||
293  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
294  return 0;
295 
296  if (LIOffs + NewLoadByteSize > MemLocEnd &&
298  Attribute::SanitizeAddress))
299  // We will be reading past the location accessed by the original program.
300  // While this is safe in a regular build, Address Safety analysis tools
301  // may start reporting false warnings. So, don't do widening.
302  return 0;
303 
304  // If a load of this width would include all of MemLoc, then we succeed.
305  if (LIOffs + NewLoadByteSize >= MemLocEnd)
306  return NewLoadByteSize;
307 
308  NewLoadByteSize <<= 1;
309  }
310 }
311 
312 static bool isVolatile(Instruction *Inst) {
313  if (auto *LI = dyn_cast<LoadInst>(Inst))
314  return LI->isVolatile();
315  if (auto *SI = dyn_cast<StoreInst>(Inst))
316  return SI->isVolatile();
317  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
318  return AI->isVolatile();
319  return false;
320 }
321 
323  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
324  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
325 
326  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
327  if (QueryInst != nullptr) {
328  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
329  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
330 
331  if (InvariantGroupDependency.isDef())
332  return InvariantGroupDependency;
333  }
334  }
336  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
337  if (SimpleDep.isDef())
338  return SimpleDep;
339  // Non-local invariant group dependency indicates there is non local Def
340  // (it only returns nonLocal if it finds nonLocal def), which is better than
341  // local clobber and everything else.
342  if (InvariantGroupDependency.isNonLocal())
343  return InvariantGroupDependency;
344 
345  assert(InvariantGroupDependency.isUnknown() &&
346  "InvariantGroupDependency should be only unknown at this point");
347  return SimpleDep;
348 }
349 
352  BasicBlock *BB) {
353 
354  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
355  if (!InvariantGroupMD)
356  return MemDepResult::getUnknown();
357 
358  // Take the ptr operand after all casts and geps 0. This way we can search
359  // cast graph down only.
360  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
361 
362  // It's is not safe to walk the use list of global value, because function
363  // passes aren't allowed to look outside their functions.
364  // FIXME: this could be fixed by filtering instructions from outside
365  // of current function.
366  if (isa<GlobalValue>(LoadOperand))
367  return MemDepResult::getUnknown();
368 
369  // Queue to process all pointers that are equivalent to load operand.
370  SmallVector<const Value *, 8> LoadOperandsQueue;
371  LoadOperandsQueue.push_back(LoadOperand);
372 
373  Instruction *ClosestDependency = nullptr;
374  // Order of instructions in uses list is unpredictible. In order to always
375  // get the same result, we will look for the closest dominance.
376  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
377  assert(Other && "Must call it with not null instruction");
378  if (Best == nullptr || DT.dominates(Best, Other))
379  return Other;
380  return Best;
381  };
382 
383 
384  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
385  // we will see all the instructions. This should be fixed in MSSA.
386  while (!LoadOperandsQueue.empty()) {
387  const Value *Ptr = LoadOperandsQueue.pop_back_val();
388  assert(Ptr && !isa<GlobalValue>(Ptr) &&
389  "Null or GlobalValue should not be inserted");
390 
391  for (const Use &Us : Ptr->uses()) {
392  auto *U = dyn_cast<Instruction>(Us.getUser());
393  if (!U || U == LI || !DT.dominates(U, LI))
394  continue;
395 
396  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
397  // users. U = bitcast Ptr
398  if (isa<BitCastInst>(U)) {
399  LoadOperandsQueue.push_back(U);
400  continue;
401  }
402  // Gep with zeros is equivalent to bitcast.
403  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
404  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
405  // typeless pointers will be ready then both cases will be gone
406  // (and this BFS also won't be needed).
407  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
408  if (GEP->hasAllZeroIndices()) {
409  LoadOperandsQueue.push_back(U);
410  continue;
411  }
412 
413  // If we hit load/store with the same invariant.group metadata (and the
414  // same pointer operand) we can assume that value pointed by pointer
415  // operand didn't change.
416  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
417  U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
418  ClosestDependency = GetClosestDependency(ClosestDependency, U);
419  }
420  }
421 
422  if (!ClosestDependency)
423  return MemDepResult::getUnknown();
424  if (ClosestDependency->getParent() == BB)
425  return MemDepResult::getDef(ClosestDependency);
426  // Def(U) can't be returned here because it is non-local. If local
427  // dependency won't be found then return nonLocal counting that the
428  // user will call getNonLocalPointerDependency, which will return cached
429  // result.
430  NonLocalDefsCache.try_emplace(
431  LI, NonLocalDepResult(ClosestDependency->getParent(),
432  MemDepResult::getDef(ClosestDependency), nullptr));
433  return MemDepResult::getNonLocal();
434 }
435 
437  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
438  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
439  bool isInvariantLoad = false;
440 
441  if (!Limit) {
442  unsigned DefaultLimit = BlockScanLimit;
443  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
444  &DefaultLimit);
445  }
446 
447  // We must be careful with atomic accesses, as they may allow another thread
448  // to touch this location, clobbering it. We are conservative: if the
449  // QueryInst is not a simple (non-atomic) memory access, we automatically
450  // return getClobber.
451  // If it is simple, we know based on the results of
452  // "Compiler testing via a theory of sound optimisations in the C11/C++11
453  // memory model" in PLDI 2013, that a non-atomic location can only be
454  // clobbered between a pair of a release and an acquire action, with no
455  // access to the location in between.
456  // Here is an example for giving the general intuition behind this rule.
457  // In the following code:
458  // store x 0;
459  // release action; [1]
460  // acquire action; [4]
461  // %val = load x;
462  // It is unsafe to replace %val by 0 because another thread may be running:
463  // acquire action; [2]
464  // store x 42;
465  // release action; [3]
466  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
467  // being 42. A key property of this program however is that if either
468  // 1 or 4 were missing, there would be a race between the store of 42
469  // either the store of 0 or the load (making the whole program racy).
470  // The paper mentioned above shows that the same property is respected
471  // by every program that can detect any optimization of that kind: either
472  // it is racy (undefined) or there is a release followed by an acquire
473  // between the pair of accesses under consideration.
474 
475  // If the load is invariant, we "know" that it doesn't alias *any* write. We
476  // do want to respect mustalias results since defs are useful for value
477  // forwarding, but any mayalias write can be assumed to be noalias.
478  // Arguably, this logic should be pushed inside AliasAnalysis itself.
479  if (isLoad && QueryInst) {
480  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
481  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
482  isInvariantLoad = true;
483  }
484 
485  const DataLayout &DL = BB->getModule()->getDataLayout();
486 
487  // Create a numbered basic block to lazily compute and cache instruction
488  // positions inside a BB. This is used to provide fast queries for relative
489  // position between two instructions in a BB and can be used by
490  // AliasAnalysis::callCapturesBefore.
491  OrderedBasicBlock OBB(BB);
492 
493  // Return "true" if and only if the instruction I is either a non-simple
494  // load or a non-simple store.
495  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
496  if (auto *LI = dyn_cast<LoadInst>(I))
497  return !LI->isSimple();
498  if (auto *SI = dyn_cast<StoreInst>(I))
499  return !SI->isSimple();
500  return false;
501  };
502 
503  // Return "true" if I is not a load and not a store, but it does access
504  // memory.
505  auto isOtherMemAccess = [](Instruction *I) -> bool {
506  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
507  };
508 
509  // Walk backwards through the basic block, looking for dependencies.
510  while (ScanIt != BB->begin()) {
511  Instruction *Inst = &*--ScanIt;
512 
513  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
514  // Debug intrinsics don't (and can't) cause dependencies.
515  if (isa<DbgInfoIntrinsic>(II))
516  continue;
517 
518  // Limit the amount of scanning we do so we don't end up with quadratic
519  // running time on extreme testcases.
520  --*Limit;
521  if (!*Limit)
522  return MemDepResult::getUnknown();
523 
524  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
525  // If we reach a lifetime begin or end marker, then the query ends here
526  // because the value is undefined.
527  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
528  // FIXME: This only considers queries directly on the invariant-tagged
529  // pointer, not on query pointers that are indexed off of them. It'd
530  // be nice to handle that at some point (the right approach is to use
531  // GetPointerBaseWithConstantOffset).
532  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
533  return MemDepResult::getDef(II);
534  continue;
535  }
536  }
537 
538  // Values depend on loads if the pointers are must aliased. This means
539  // that a load depends on another must aliased load from the same value.
540  // One exception is atomic loads: a value can depend on an atomic load that
541  // it does not alias with when this atomic load indicates that another
542  // thread may be accessing the location.
543  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
544 
545  // While volatile access cannot be eliminated, they do not have to clobber
546  // non-aliasing locations, as normal accesses, for example, can be safely
547  // reordered with volatile accesses.
548  if (LI->isVolatile()) {
549  if (!QueryInst)
550  // Original QueryInst *may* be volatile
551  return MemDepResult::getClobber(LI);
552  if (isVolatile(QueryInst))
553  // Ordering required if QueryInst is itself volatile
554  return MemDepResult::getClobber(LI);
555  // Otherwise, volatile doesn't imply any special ordering
556  }
557 
558  // Atomic loads have complications involved.
559  // A Monotonic (or higher) load is OK if the query inst is itself not
560  // atomic.
561  // FIXME: This is overly conservative.
562  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
563  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
564  isOtherMemAccess(QueryInst))
565  return MemDepResult::getClobber(LI);
566  if (LI->getOrdering() != AtomicOrdering::Monotonic)
567  return MemDepResult::getClobber(LI);
568  }
569 
570  MemoryLocation LoadLoc = MemoryLocation::get(LI);
571 
572  // If we found a pointer, check if it could be the same as our pointer.
573  AliasResult R = AA.alias(LoadLoc, MemLoc);
574 
575  if (isLoad) {
576  if (R == NoAlias)
577  continue;
578 
579  // Must aliased loads are defs of each other.
580  if (R == MustAlias)
581  return MemDepResult::getDef(Inst);
582 
583 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
584  // in terms of clobbering loads, but since it does this by looking
585  // at the clobbering load directly, it doesn't know about any
586  // phi translation that may have happened along the way.
587 
588  // If we have a partial alias, then return this as a clobber for the
589  // client to handle.
590  if (R == PartialAlias)
591  return MemDepResult::getClobber(Inst);
592 #endif
593 
594  // Random may-alias loads don't depend on each other without a
595  // dependence.
596  continue;
597  }
598 
599  // Stores don't depend on other no-aliased accesses.
600  if (R == NoAlias)
601  continue;
602 
603  // Stores don't alias loads from read-only memory.
604  if (AA.pointsToConstantMemory(LoadLoc))
605  continue;
606 
607  // Stores depend on may/must aliased loads.
608  return MemDepResult::getDef(Inst);
609  }
610 
611  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
612  // Atomic stores have complications involved.
613  // A Monotonic store is OK if the query inst is itself not atomic.
614  // FIXME: This is overly conservative.
615  if (!SI->isUnordered() && SI->isAtomic()) {
616  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
617  isOtherMemAccess(QueryInst))
619  if (SI->getOrdering() != AtomicOrdering::Monotonic)
621  }
622 
623  // FIXME: this is overly conservative.
624  // While volatile access cannot be eliminated, they do not have to clobber
625  // non-aliasing locations, as normal accesses can for example be reordered
626  // with volatile accesses.
627  if (SI->isVolatile())
628  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
629  isOtherMemAccess(QueryInst))
631 
632  // If alias analysis can tell that this store is guaranteed to not modify
633  // the query pointer, ignore it. Use getModRefInfo to handle cases where
634  // the query pointer points to constant memory etc.
635  if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef)
636  continue;
637 
638  // Ok, this store might clobber the query pointer. Check to see if it is
639  // a must alias: in this case, we want to return this as a def.
641 
642  // If we found a pointer, check if it could be the same as our pointer.
643  AliasResult R = AA.alias(StoreLoc, MemLoc);
644 
645  if (R == NoAlias)
646  continue;
647  if (R == MustAlias)
648  return MemDepResult::getDef(Inst);
649  if (isInvariantLoad)
650  continue;
651  return MemDepResult::getClobber(Inst);
652  }
653 
654  // If this is an allocation, and if we know that the accessed pointer is to
655  // the allocation, return Def. This means that there is no dependence and
656  // the access can be optimized based on that. For example, a load could
657  // turn into undef. Note that we can bypass the allocation itself when
658  // looking for a clobber in many cases; that's an alias property and is
659  // handled by BasicAA.
660  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
661  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
662  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
663  return MemDepResult::getDef(Inst);
664  }
665 
666  if (isInvariantLoad)
667  continue;
668 
669  // A release fence requires that all stores complete before it, but does
670  // not prevent the reordering of following loads or stores 'before' the
671  // fence. As a result, we look past it when finding a dependency for
672  // loads. DSE uses this to find preceeding stores to delete and thus we
673  // can't bypass the fence if the query instruction is a store.
674  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
675  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
676  continue;
677 
678  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
679  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
680  // If necessary, perform additional analysis.
681  if (MR == MRI_ModRef)
682  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
683  switch (MR) {
684  case MRI_NoModRef:
685  // If the call has no effect on the queried pointer, just ignore it.
686  continue;
687  case MRI_Mod:
688  return MemDepResult::getClobber(Inst);
689  case MRI_Ref:
690  // If the call is known to never store to the pointer, and if this is a
691  // load query, we can safely ignore it (scan past it).
692  if (isLoad)
693  continue;
695  default:
696  // Otherwise, there is a potential dependence. Return a clobber.
697  return MemDepResult::getClobber(Inst);
698  }
699  }
700 
701  // No dependence found. If this is the entry block of the function, it is
702  // unknown, otherwise it is non-local.
703  if (BB != &BB->getParent()->getEntryBlock())
704  return MemDepResult::getNonLocal();
706 }
707 
709  Instruction *ScanPos = QueryInst;
710 
711  // Check for a cached result
712  MemDepResult &LocalCache = LocalDeps[QueryInst];
713 
714  // If the cached entry is non-dirty, just return it. Note that this depends
715  // on MemDepResult's default constructing to 'dirty'.
716  if (!LocalCache.isDirty())
717  return LocalCache;
718 
719  // Otherwise, if we have a dirty entry, we know we can start the scan at that
720  // instruction, which may save us some work.
721  if (Instruction *Inst = LocalCache.getInst()) {
722  ScanPos = Inst;
723 
724  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
725  }
726 
727  BasicBlock *QueryParent = QueryInst->getParent();
728 
729  // Do the scan.
730  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
731  // No dependence found. If this is the entry block of the function, it is
732  // unknown, otherwise it is non-local.
733  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
734  LocalCache = MemDepResult::getNonLocal();
735  else
736  LocalCache = MemDepResult::getNonFuncLocal();
737  } else {
738  MemoryLocation MemLoc;
739  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
740  if (MemLoc.Ptr) {
741  // If we can do a pointer scan, make it happen.
742  bool isLoad = !(MR & MRI_Mod);
743  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
744  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
745 
746  LocalCache = getPointerDependencyFrom(
747  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
748  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
749  CallSite QueryCS(QueryInst);
750  bool isReadOnly = AA.onlyReadsMemory(QueryCS);
751  LocalCache = getCallSiteDependencyFrom(
752  QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
753  } else
754  // Non-memory instruction.
755  LocalCache = MemDepResult::getUnknown();
756  }
757 
758  // Remember the result!
759  if (Instruction *I = LocalCache.getInst())
760  ReverseLocalDeps[I].insert(QueryInst);
761 
762  return LocalCache;
763 }
764 
765 #ifndef NDEBUG
766 /// This method is used when -debug is specified to verify that cache arrays
767 /// are properly kept sorted.
769  int Count = -1) {
770  if (Count == -1)
771  Count = Cache.size();
772  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
773  "Cache isn't sorted!");
774 }
775 #endif
776 
779  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
780  "getNonLocalCallDependency should only be used on calls with "
781  "non-local deps!");
782  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
783  NonLocalDepInfo &Cache = CacheP.first;
784 
785  // This is the set of blocks that need to be recomputed. In the cached case,
786  // this can happen due to instructions being deleted etc. In the uncached
787  // case, this starts out as the set of predecessors we care about.
788  SmallVector<BasicBlock *, 32> DirtyBlocks;
789 
790  if (!Cache.empty()) {
791  // Okay, we have a cache entry. If we know it is not dirty, just return it
792  // with no computation.
793  if (!CacheP.second) {
794  ++NumCacheNonLocal;
795  return Cache;
796  }
797 
798  // If we already have a partially computed set of results, scan them to
799  // determine what is dirty, seeding our initial DirtyBlocks worklist.
800  for (auto &Entry : Cache)
801  if (Entry.getResult().isDirty())
802  DirtyBlocks.push_back(Entry.getBB());
803 
804  // Sort the cache so that we can do fast binary search lookups below.
805  std::sort(Cache.begin(), Cache.end());
806 
807  ++NumCacheDirtyNonLocal;
808  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
809  // << Cache.size() << " cached: " << *QueryInst;
810  } else {
811  // Seed DirtyBlocks with each of the preds of QueryInst's block.
812  BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
813  for (BasicBlock *Pred : PredCache.get(QueryBB))
814  DirtyBlocks.push_back(Pred);
815  ++NumUncacheNonLocal;
816  }
817 
818  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
819  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
820 
822 
823  unsigned NumSortedEntries = Cache.size();
824  DEBUG(AssertSorted(Cache));
825 
826  // Iterate while we still have blocks to update.
827  while (!DirtyBlocks.empty()) {
828  BasicBlock *DirtyBB = DirtyBlocks.back();
829  DirtyBlocks.pop_back();
830 
831  // Already processed this block?
832  if (!Visited.insert(DirtyBB).second)
833  continue;
834 
835  // Do a binary search to see if we already have an entry for this block in
836  // the cache set. If so, find it.
837  DEBUG(AssertSorted(Cache, NumSortedEntries));
838  NonLocalDepInfo::iterator Entry =
839  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
840  NonLocalDepEntry(DirtyBB));
841  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
842  --Entry;
843 
844  NonLocalDepEntry *ExistingResult = nullptr;
845  if (Entry != Cache.begin() + NumSortedEntries &&
846  Entry->getBB() == DirtyBB) {
847  // If we already have an entry, and if it isn't already dirty, the block
848  // is done.
849  if (!Entry->getResult().isDirty())
850  continue;
851 
852  // Otherwise, remember this slot so we can update the value.
853  ExistingResult = &*Entry;
854  }
855 
856  // If the dirty entry has a pointer, start scanning from it so we don't have
857  // to rescan the entire block.
858  BasicBlock::iterator ScanPos = DirtyBB->end();
859  if (ExistingResult) {
860  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
861  ScanPos = Inst->getIterator();
862  // We're removing QueryInst's use of Inst.
863  RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
864  QueryCS.getInstruction());
865  }
866  }
867 
868  // Find out if this block has a local dependency for QueryInst.
869  MemDepResult Dep;
870 
871  if (ScanPos != DirtyBB->begin()) {
872  Dep =
873  getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
874  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
875  // No dependence found. If this is the entry block of the function, it is
876  // a clobber, otherwise it is unknown.
878  } else {
880  }
881 
882  // If we had a dirty entry for the block, update it. Otherwise, just add
883  // a new entry.
884  if (ExistingResult)
885  ExistingResult->setResult(Dep);
886  else
887  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
888 
889  // If the block has a dependency (i.e. it isn't completely transparent to
890  // the value), remember the association!
891  if (!Dep.isNonLocal()) {
892  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
893  // update this when we remove instructions.
894  if (Instruction *Inst = Dep.getInst())
895  ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
896  } else {
897 
898  // If the block *is* completely transparent to the load, we need to check
899  // the predecessors of this block. Add them to our worklist.
900  for (BasicBlock *Pred : PredCache.get(DirtyBB))
901  DirtyBlocks.push_back(Pred);
902  }
903  }
904 
905  return Cache;
906 }
907 
909  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
910  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
911  bool isLoad = isa<LoadInst>(QueryInst);
912  BasicBlock *FromBB = QueryInst->getParent();
913  assert(FromBB);
914 
915  assert(Loc.Ptr->getType()->isPointerTy() &&
916  "Can't get pointer deps of a non-pointer!");
917  Result.clear();
918  {
919  // Check if there is cached Def with invariant.group. FIXME: cache might be
920  // invalid if cached instruction would be removed between call to
921  // getPointerDependencyFrom and this function.
922  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
923  if (NonLocalDefIt != NonLocalDefsCache.end()) {
924  Result.push_back(std::move(NonLocalDefIt->second));
925  NonLocalDefsCache.erase(NonLocalDefIt);
926  return;
927  }
928  }
929  // This routine does not expect to deal with volatile instructions.
930  // Doing so would require piping through the QueryInst all the way through.
931  // TODO: volatiles can't be elided, but they can be reordered with other
932  // non-volatile accesses.
933 
934  // We currently give up on any instruction which is ordered, but we do handle
935  // atomic instructions which are unordered.
936  // TODO: Handle ordered instructions
937  auto isOrdered = [](Instruction *Inst) {
938  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
939  return !LI->isUnordered();
940  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
941  return !SI->isUnordered();
942  }
943  return false;
944  };
945  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
947  const_cast<Value *>(Loc.Ptr)));
948  return;
949  }
950  const DataLayout &DL = FromBB->getModule()->getDataLayout();
951  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
952 
953  // This is the set of blocks we've inspected, and the pointer we consider in
954  // each block. Because of critical edges, we currently bail out if querying
955  // a block with multiple different pointers. This can happen during PHI
956  // translation.
958  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
959  Result, Visited, true))
960  return;
961  Result.clear();
963  const_cast<Value *>(Loc.Ptr)));
964 }
965 
966 /// Compute the memdep value for BB with Pointer/PointeeSize using either
967 /// cached information in Cache or by doing a lookup (which may use dirty cache
968 /// info if available).
969 ///
970 /// If we do a lookup, add the result to the cache.
971 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
972  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
973  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
974 
975  // Do a binary search to see if we already have an entry for this block in
976  // the cache set. If so, find it.
977  NonLocalDepInfo::iterator Entry = std::upper_bound(
978  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
979  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
980  --Entry;
981 
982  NonLocalDepEntry *ExistingResult = nullptr;
983  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
984  ExistingResult = &*Entry;
985 
986  // If we have a cached entry, and it is non-dirty, use it as the value for
987  // this dependency.
988  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
989  ++NumCacheNonLocalPtr;
990  return ExistingResult->getResult();
991  }
992 
993  // Otherwise, we have to scan for the value. If we have a dirty cache
994  // entry, start scanning from its position, otherwise we scan from the end
995  // of the block.
996  BasicBlock::iterator ScanPos = BB->end();
997  if (ExistingResult && ExistingResult->getResult().getInst()) {
998  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
999  "Instruction invalidated?");
1000  ++NumCacheDirtyNonLocalPtr;
1001  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1002 
1003  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1004  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1005  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1006  } else {
1007  ++NumUncacheNonLocalPtr;
1008  }
1009 
1010  // Scan the block for the dependency.
1011  MemDepResult Dep =
1012  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1013 
1014  // If we had a dirty entry for the block, update it. Otherwise, just add
1015  // a new entry.
1016  if (ExistingResult)
1017  ExistingResult->setResult(Dep);
1018  else
1019  Cache->push_back(NonLocalDepEntry(BB, Dep));
1020 
1021  // If the block has a dependency (i.e. it isn't completely transparent to
1022  // the value), remember the reverse association because we just added it
1023  // to Cache!
1024  if (!Dep.isDef() && !Dep.isClobber())
1025  return Dep;
1026 
1027  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1028  // update MemDep when we remove instructions.
1029  Instruction *Inst = Dep.getInst();
1030  assert(Inst && "Didn't depend on anything?");
1031  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1032  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1033  return Dep;
1034 }
1035 
1036 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1037 /// array that are already properly ordered.
1038 ///
1039 /// This is optimized for the case when only a few entries are added.
1040 static void
1042  unsigned NumSortedEntries) {
1043  switch (Cache.size() - NumSortedEntries) {
1044  case 0:
1045  // done, no new entries.
1046  break;
1047  case 2: {
1048  // Two new entries, insert the last one into place.
1049  NonLocalDepEntry Val = Cache.back();
1050  Cache.pop_back();
1051  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1052  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1053  Cache.insert(Entry, Val);
1055  }
1056  case 1:
1057  // One new entry, Just insert the new value at the appropriate position.
1058  if (Cache.size() != 1) {
1059  NonLocalDepEntry Val = Cache.back();
1060  Cache.pop_back();
1061  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1062  std::upper_bound(Cache.begin(), Cache.end(), Val);
1063  Cache.insert(Entry, Val);
1064  }
1065  break;
1066  default:
1067  // Added many values, do a full scale sort.
1068  std::sort(Cache.begin(), Cache.end());
1069  break;
1070  }
1071 }
1072 
1073 /// Perform a dependency query based on pointer/pointeesize starting at the end
1074 /// of StartBB.
1075 ///
1076 /// Add any clobber/def results to the results vector and keep track of which
1077 /// blocks are visited in 'Visited'.
1078 ///
1079 /// This has special behavior for the first block queries (when SkipFirstBlock
1080 /// is true). In this special case, it ignores the contents of the specified
1081 /// block and starts returning dependence info for its predecessors.
1082 ///
1083 /// This function returns true on success, or false to indicate that it could
1084 /// not compute dependence information for some reason. This should be treated
1085 /// as a clobber dependence on the first instruction in the predecessor block.
1086 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1087  Instruction *QueryInst, const PHITransAddr &Pointer,
1088  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1090  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1091  // Look up the cached info for Pointer.
1092  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1093 
1094  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1095  // CacheKey, this value will be inserted as the associated value. Otherwise,
1096  // it'll be ignored, and we'll have to check to see if the cached size and
1097  // aa tags are consistent with the current query.
1098  NonLocalPointerInfo InitialNLPI;
1099  InitialNLPI.Size = Loc.Size;
1100  InitialNLPI.AATags = Loc.AATags;
1101 
1102  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1103  // already have one.
1104  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1105  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1106  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1107 
1108  // If we already have a cache entry for this CacheKey, we may need to do some
1109  // work to reconcile the cache entry and the current query.
1110  if (!Pair.second) {
1111  if (CacheInfo->Size < Loc.Size) {
1112  // The query's Size is greater than the cached one. Throw out the
1113  // cached data and proceed with the query at the greater size.
1114  CacheInfo->Pair = BBSkipFirstBlockPair();
1115  CacheInfo->Size = Loc.Size;
1116  for (auto &Entry : CacheInfo->NonLocalDeps)
1117  if (Instruction *Inst = Entry.getResult().getInst())
1118  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1119  CacheInfo->NonLocalDeps.clear();
1120  } else if (CacheInfo->Size > Loc.Size) {
1121  // This query's Size is less than the cached one. Conservatively restart
1122  // the query using the greater size.
1123  return getNonLocalPointerDepFromBB(
1124  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1125  StartBB, Result, Visited, SkipFirstBlock);
1126  }
1127 
1128  // If the query's AATags are inconsistent with the cached one,
1129  // conservatively throw out the cached data and restart the query with
1130  // no tag if needed.
1131  if (CacheInfo->AATags != Loc.AATags) {
1132  if (CacheInfo->AATags) {
1133  CacheInfo->Pair = BBSkipFirstBlockPair();
1134  CacheInfo->AATags = AAMDNodes();
1135  for (auto &Entry : CacheInfo->NonLocalDeps)
1136  if (Instruction *Inst = Entry.getResult().getInst())
1137  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1138  CacheInfo->NonLocalDeps.clear();
1139  }
1140  if (Loc.AATags)
1141  return getNonLocalPointerDepFromBB(
1142  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1143  Visited, SkipFirstBlock);
1144  }
1145  }
1146 
1147  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1148 
1149  // If we have valid cached information for exactly the block we are
1150  // investigating, just return it with no recomputation.
1151  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1152  // We have a fully cached result for this query then we can just return the
1153  // cached results and populate the visited set. However, we have to verify
1154  // that we don't already have conflicting results for these blocks. Check
1155  // to ensure that if a block in the results set is in the visited set that
1156  // it was for the same pointer query.
1157  if (!Visited.empty()) {
1158  for (auto &Entry : *Cache) {
1160  Visited.find(Entry.getBB());
1161  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1162  continue;
1163 
1164  // We have a pointer mismatch in a block. Just return false, saying
1165  // that something was clobbered in this result. We could also do a
1166  // non-fully cached query, but there is little point in doing this.
1167  return false;
1168  }
1169  }
1170 
1171  Value *Addr = Pointer.getAddr();
1172  for (auto &Entry : *Cache) {
1173  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1174  if (Entry.getResult().isNonLocal()) {
1175  continue;
1176  }
1177 
1178  if (DT.isReachableFromEntry(Entry.getBB())) {
1179  Result.push_back(
1180  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1181  }
1182  }
1183  ++NumCacheCompleteNonLocalPtr;
1184  return true;
1185  }
1186 
1187  // Otherwise, either this is a new block, a block with an invalid cache
1188  // pointer or one that we're about to invalidate by putting more info into it
1189  // than its valid cache info. If empty, the result will be valid cache info,
1190  // otherwise it isn't.
1191  if (Cache->empty())
1192  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1193  else
1194  CacheInfo->Pair = BBSkipFirstBlockPair();
1195 
1197  Worklist.push_back(StartBB);
1198 
1199  // PredList used inside loop.
1201 
1202  // Keep track of the entries that we know are sorted. Previously cached
1203  // entries will all be sorted. The entries we add we only sort on demand (we
1204  // don't insert every element into its sorted position). We know that we
1205  // won't get any reuse from currently inserted values, because we don't
1206  // revisit blocks after we insert info for them.
1207  unsigned NumSortedEntries = Cache->size();
1208  unsigned WorklistEntries = BlockNumberLimit;
1209  bool GotWorklistLimit = false;
1210  DEBUG(AssertSorted(*Cache));
1211 
1212  while (!Worklist.empty()) {
1213  BasicBlock *BB = Worklist.pop_back_val();
1214 
1215  // If we do process a large number of blocks it becomes very expensive and
1216  // likely it isn't worth worrying about
1217  if (Result.size() > NumResultsLimit) {
1218  Worklist.clear();
1219  // Sort it now (if needed) so that recursive invocations of
1220  // getNonLocalPointerDepFromBB and other routines that could reuse the
1221  // cache value will only see properly sorted cache arrays.
1222  if (Cache && NumSortedEntries != Cache->size()) {
1223  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1224  }
1225  // Since we bail out, the "Cache" set won't contain all of the
1226  // results for the query. This is ok (we can still use it to accelerate
1227  // specific block queries) but we can't do the fastpath "return all
1228  // results from the set". Clear out the indicator for this.
1229  CacheInfo->Pair = BBSkipFirstBlockPair();
1230  return false;
1231  }
1232 
1233  // Skip the first block if we have it.
1234  if (!SkipFirstBlock) {
1235  // Analyze the dependency of *Pointer in FromBB. See if we already have
1236  // been here.
1237  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1238 
1239  // Get the dependency info for Pointer in BB. If we have cached
1240  // information, we will use it, otherwise we compute it.
1241  DEBUG(AssertSorted(*Cache, NumSortedEntries));
1242  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1243  Cache, NumSortedEntries);
1244 
1245  // If we got a Def or Clobber, add this to the list of results.
1246  if (!Dep.isNonLocal()) {
1247  if (DT.isReachableFromEntry(BB)) {
1248  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1249  continue;
1250  }
1251  }
1252  }
1253 
1254  // If 'Pointer' is an instruction defined in this block, then we need to do
1255  // phi translation to change it into a value live in the predecessor block.
1256  // If not, we just add the predecessors to the worklist and scan them with
1257  // the same Pointer.
1258  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1259  SkipFirstBlock = false;
1261  for (BasicBlock *Pred : PredCache.get(BB)) {
1262  // Verify that we haven't looked at this block yet.
1263  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1264  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1265  if (InsertRes.second) {
1266  // First time we've looked at *PI.
1267  NewBlocks.push_back(Pred);
1268  continue;
1269  }
1270 
1271  // If we have seen this block before, but it was with a different
1272  // pointer then we have a phi translation failure and we have to treat
1273  // this as a clobber.
1274  if (InsertRes.first->second != Pointer.getAddr()) {
1275  // Make sure to clean up the Visited map before continuing on to
1276  // PredTranslationFailure.
1277  for (unsigned i = 0; i < NewBlocks.size(); i++)
1278  Visited.erase(NewBlocks[i]);
1279  goto PredTranslationFailure;
1280  }
1281  }
1282  if (NewBlocks.size() > WorklistEntries) {
1283  // Make sure to clean up the Visited map before continuing on to
1284  // PredTranslationFailure.
1285  for (unsigned i = 0; i < NewBlocks.size(); i++)
1286  Visited.erase(NewBlocks[i]);
1287  GotWorklistLimit = true;
1288  goto PredTranslationFailure;
1289  }
1290  WorklistEntries -= NewBlocks.size();
1291  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1292  continue;
1293  }
1294 
1295  // We do need to do phi translation, if we know ahead of time we can't phi
1296  // translate this value, don't even try.
1297  if (!Pointer.IsPotentiallyPHITranslatable())
1298  goto PredTranslationFailure;
1299 
1300  // We may have added values to the cache list before this PHI translation.
1301  // If so, we haven't done anything to ensure that the cache remains sorted.
1302  // Sort it now (if needed) so that recursive invocations of
1303  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1304  // value will only see properly sorted cache arrays.
1305  if (Cache && NumSortedEntries != Cache->size()) {
1306  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1307  NumSortedEntries = Cache->size();
1308  }
1309  Cache = nullptr;
1310 
1311  PredList.clear();
1312  for (BasicBlock *Pred : PredCache.get(BB)) {
1313  PredList.push_back(std::make_pair(Pred, Pointer));
1314 
1315  // Get the PHI translated pointer in this predecessor. This can fail if
1316  // not translatable, in which case the getAddr() returns null.
1317  PHITransAddr &PredPointer = PredList.back().second;
1318  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1319  Value *PredPtrVal = PredPointer.getAddr();
1320 
1321  // Check to see if we have already visited this pred block with another
1322  // pointer. If so, we can't do this lookup. This failure can occur
1323  // with PHI translation when a critical edge exists and the PHI node in
1324  // the successor translates to a pointer value different than the
1325  // pointer the block was first analyzed with.
1326  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1327  Visited.insert(std::make_pair(Pred, PredPtrVal));
1328 
1329  if (!InsertRes.second) {
1330  // We found the pred; take it off the list of preds to visit.
1331  PredList.pop_back();
1332 
1333  // If the predecessor was visited with PredPtr, then we already did
1334  // the analysis and can ignore it.
1335  if (InsertRes.first->second == PredPtrVal)
1336  continue;
1337 
1338  // Otherwise, the block was previously analyzed with a different
1339  // pointer. We can't represent the result of this case, so we just
1340  // treat this as a phi translation failure.
1341 
1342  // Make sure to clean up the Visited map before continuing on to
1343  // PredTranslationFailure.
1344  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1345  Visited.erase(PredList[i].first);
1346 
1347  goto PredTranslationFailure;
1348  }
1349  }
1350 
1351  // Actually process results here; this need to be a separate loop to avoid
1352  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1353  // any results for. (getNonLocalPointerDepFromBB will modify our
1354  // datastructures in ways the code after the PredTranslationFailure label
1355  // doesn't expect.)
1356  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1357  BasicBlock *Pred = PredList[i].first;
1358  PHITransAddr &PredPointer = PredList[i].second;
1359  Value *PredPtrVal = PredPointer.getAddr();
1360 
1361  bool CanTranslate = true;
1362  // If PHI translation was unable to find an available pointer in this
1363  // predecessor, then we have to assume that the pointer is clobbered in
1364  // that predecessor. We can still do PRE of the load, which would insert
1365  // a computation of the pointer in this predecessor.
1366  if (!PredPtrVal)
1367  CanTranslate = false;
1368 
1369  // FIXME: it is entirely possible that PHI translating will end up with
1370  // the same value. Consider PHI translating something like:
1371  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1372  // to recurse here, pedantically speaking.
1373 
1374  // If getNonLocalPointerDepFromBB fails here, that means the cached
1375  // result conflicted with the Visited list; we have to conservatively
1376  // assume it is unknown, but this also does not block PRE of the load.
1377  if (!CanTranslate ||
1378  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1379  Loc.getWithNewPtr(PredPtrVal), isLoad,
1380  Pred, Result, Visited)) {
1381  // Add the entry to the Result list.
1382  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1383  Result.push_back(Entry);
1384 
1385  // Since we had a phi translation failure, the cache for CacheKey won't
1386  // include all of the entries that we need to immediately satisfy future
1387  // queries. Mark this in NonLocalPointerDeps by setting the
1388  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1389  // cached value to do more work but not miss the phi trans failure.
1390  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1391  NLPI.Pair = BBSkipFirstBlockPair();
1392  continue;
1393  }
1394  }
1395 
1396  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1397  CacheInfo = &NonLocalPointerDeps[CacheKey];
1398  Cache = &CacheInfo->NonLocalDeps;
1399  NumSortedEntries = Cache->size();
1400 
1401  // Since we did phi translation, the "Cache" set won't contain all of the
1402  // results for the query. This is ok (we can still use it to accelerate
1403  // specific block queries) but we can't do the fastpath "return all
1404  // results from the set" Clear out the indicator for this.
1405  CacheInfo->Pair = BBSkipFirstBlockPair();
1406  SkipFirstBlock = false;
1407  continue;
1408 
1409  PredTranslationFailure:
1410  // The following code is "failure"; we can't produce a sane translation
1411  // for the given block. It assumes that we haven't modified any of
1412  // our datastructures while processing the current block.
1413 
1414  if (!Cache) {
1415  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1416  CacheInfo = &NonLocalPointerDeps[CacheKey];
1417  Cache = &CacheInfo->NonLocalDeps;
1418  NumSortedEntries = Cache->size();
1419  }
1420 
1421  // Since we failed phi translation, the "Cache" set won't contain all of the
1422  // results for the query. This is ok (we can still use it to accelerate
1423  // specific block queries) but we can't do the fastpath "return all
1424  // results from the set". Clear out the indicator for this.
1425  CacheInfo->Pair = BBSkipFirstBlockPair();
1426 
1427  // If *nothing* works, mark the pointer as unknown.
1428  //
1429  // If this is the magic first block, return this as a clobber of the whole
1430  // incoming value. Since we can't phi translate to one of the predecessors,
1431  // we have to bail out.
1432  if (SkipFirstBlock)
1433  return false;
1434 
1435  bool foundBlock = false;
1436  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1437  if (I.getBB() != BB)
1438  continue;
1439 
1440  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1441  !DT.isReachableFromEntry(BB)) &&
1442  "Should only be here with transparent block");
1443  foundBlock = true;
1444  I.setResult(MemDepResult::getUnknown());
1445  Result.push_back(
1446  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1447  break;
1448  }
1449  (void)foundBlock; (void)GotWorklistLimit;
1450  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1451  }
1452 
1453  // Okay, we're done now. If we added new values to the cache, re-sort it.
1454  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1455  DEBUG(AssertSorted(*Cache));
1456  return true;
1457 }
1458 
1459 /// If P exists in CachedNonLocalPointerInfo, remove it.
1460 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1461  ValueIsLoadPair P) {
1462  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1463  if (It == NonLocalPointerDeps.end())
1464  return;
1465 
1466  // Remove all of the entries in the BB->val map. This involves removing
1467  // instructions from the reverse map.
1468  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1469 
1470  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1471  Instruction *Target = PInfo[i].getResult().getInst();
1472  if (!Target)
1473  continue; // Ignore non-local dep results.
1474  assert(Target->getParent() == PInfo[i].getBB());
1475 
1476  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1477  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1478  }
1479 
1480  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1481  NonLocalPointerDeps.erase(It);
1482 }
1483 
1485  // If Ptr isn't really a pointer, just ignore it.
1486  if (!Ptr->getType()->isPointerTy())
1487  return;
1488  // Flush store info for the pointer.
1489  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1490  // Flush load info for the pointer.
1491  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1492 }
1493 
1495  PredCache.clear();
1496 }
1497 
1499  // Walk through the Non-local dependencies, removing this one as the value
1500  // for any cached queries.
1501  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1502  if (NLDI != NonLocalDeps.end()) {
1503  NonLocalDepInfo &BlockMap = NLDI->second.first;
1504  for (auto &Entry : BlockMap)
1505  if (Instruction *Inst = Entry.getResult().getInst())
1506  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1507  NonLocalDeps.erase(NLDI);
1508  }
1509 
1510  // If we have a cached local dependence query for this instruction, remove it.
1511  //
1512  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1513  if (LocalDepEntry != LocalDeps.end()) {
1514  // Remove us from DepInst's reverse set now that the local dep info is gone.
1515  if (Instruction *Inst = LocalDepEntry->second.getInst())
1516  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1517 
1518  // Remove this local dependency info.
1519  LocalDeps.erase(LocalDepEntry);
1520  }
1521 
1522  // If we have any cached pointer dependencies on this instruction, remove
1523  // them. If the instruction has non-pointer type, then it can't be a pointer
1524  // base.
1525 
1526  // Remove it from both the load info and the store info. The instruction
1527  // can't be in either of these maps if it is non-pointer.
1528  if (RemInst->getType()->isPointerTy()) {
1529  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1530  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1531  }
1532 
1533  // Loop over all of the things that depend on the instruction we're removing.
1534  //
1536 
1537  // If we find RemInst as a clobber or Def in any of the maps for other values,
1538  // we need to replace its entry with a dirty version of the instruction after
1539  // it. If RemInst is a terminator, we use a null dirty value.
1540  //
1541  // Using a dirty version of the instruction after RemInst saves having to scan
1542  // the entire block to get to this point.
1543  MemDepResult NewDirtyVal;
1544  if (!RemInst->isTerminator())
1545  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1546 
1547  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1548  if (ReverseDepIt != ReverseLocalDeps.end()) {
1549  // RemInst can't be the terminator if it has local stuff depending on it.
1550  assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
1551  "Nothing can locally depend on a terminator");
1552 
1553  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1554  assert(InstDependingOnRemInst != RemInst &&
1555  "Already removed our local dep info");
1556 
1557  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1558 
1559  // Make sure to remember that new things depend on NewDepInst.
1560  assert(NewDirtyVal.getInst() &&
1561  "There is no way something else can have "
1562  "a local dep on this if it is a terminator!");
1563  ReverseDepsToAdd.push_back(
1564  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1565  }
1566 
1567  ReverseLocalDeps.erase(ReverseDepIt);
1568 
1569  // Add new reverse deps after scanning the set, to avoid invalidating the
1570  // 'ReverseDeps' reference.
1571  while (!ReverseDepsToAdd.empty()) {
1572  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1573  ReverseDepsToAdd.back().second);
1574  ReverseDepsToAdd.pop_back();
1575  }
1576  }
1577 
1578  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1579  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1580  for (Instruction *I : ReverseDepIt->second) {
1581  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1582 
1583  PerInstNLInfo &INLD = NonLocalDeps[I];
1584  // The information is now dirty!
1585  INLD.second = true;
1586 
1587  for (auto &Entry : INLD.first) {
1588  if (Entry.getResult().getInst() != RemInst)
1589  continue;
1590 
1591  // Convert to a dirty entry for the subsequent instruction.
1592  Entry.setResult(NewDirtyVal);
1593 
1594  if (Instruction *NextI = NewDirtyVal.getInst())
1595  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1596  }
1597  }
1598 
1599  ReverseNonLocalDeps.erase(ReverseDepIt);
1600 
1601  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1602  while (!ReverseDepsToAdd.empty()) {
1603  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1604  ReverseDepsToAdd.back().second);
1605  ReverseDepsToAdd.pop_back();
1606  }
1607  }
1608 
1609  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1610  // value in the NonLocalPointerDeps info.
1611  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1612  ReverseNonLocalPtrDeps.find(RemInst);
1613  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1615  ReversePtrDepsToAdd;
1616 
1617  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1618  assert(P.getPointer() != RemInst &&
1619  "Already removed NonLocalPointerDeps info for RemInst");
1620 
1621  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1622 
1623  // The cache is not valid for any specific block anymore.
1624  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1625 
1626  // Update any entries for RemInst to use the instruction after it.
1627  for (auto &Entry : NLPDI) {
1628  if (Entry.getResult().getInst() != RemInst)
1629  continue;
1630 
1631  // Convert to a dirty entry for the subsequent instruction.
1632  Entry.setResult(NewDirtyVal);
1633 
1634  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1635  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1636  }
1637 
1638  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1639  // subsequent value may invalidate the sortedness.
1640  std::sort(NLPDI.begin(), NLPDI.end());
1641  }
1642 
1643  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1644 
1645  while (!ReversePtrDepsToAdd.empty()) {
1646  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1647  ReversePtrDepsToAdd.back().second);
1648  ReversePtrDepsToAdd.pop_back();
1649  }
1650  }
1651 
1652  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1653  DEBUG(verifyRemoved(RemInst));
1654 }
1655 
1656 /// Verify that the specified instruction does not occur in our internal data
1657 /// structures.
1658 ///
1659 /// This function verifies by asserting in debug builds.
1660 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1661 #ifndef NDEBUG
1662  for (const auto &DepKV : LocalDeps) {
1663  assert(DepKV.first != D && "Inst occurs in data structures");
1664  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1665  }
1666 
1667  for (const auto &DepKV : NonLocalPointerDeps) {
1668  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1669  for (const auto &Entry : DepKV.second.NonLocalDeps)
1670  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1671  }
1672 
1673  for (const auto &DepKV : NonLocalDeps) {
1674  assert(DepKV.first != D && "Inst occurs in data structures");
1675  const PerInstNLInfo &INLD = DepKV.second;
1676  for (const auto &Entry : INLD.first)
1677  assert(Entry.getResult().getInst() != D &&
1678  "Inst occurs in data structures");
1679  }
1680 
1681  for (const auto &DepKV : ReverseLocalDeps) {
1682  assert(DepKV.first != D && "Inst occurs in data structures");
1683  for (Instruction *Inst : DepKV.second)
1684  assert(Inst != D && "Inst occurs in data structures");
1685  }
1686 
1687  for (const auto &DepKV : ReverseNonLocalDeps) {
1688  assert(DepKV.first != D && "Inst occurs in data structures");
1689  for (Instruction *Inst : DepKV.second)
1690  assert(Inst != D && "Inst occurs in data structures");
1691  }
1692 
1693  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1694  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1695 
1696  for (ValueIsLoadPair P : DepKV.second)
1697  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1698  "Inst occurs in ReverseNonLocalPtrDeps map");
1699  }
1700 #endif
1701 }
1702 
1703 AnalysisKey MemoryDependenceAnalysis::Key;
1704 
1707  auto &AA = AM.getResult<AAManager>(F);
1708  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1709  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1710  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1711  return MemoryDependenceResults(AA, AC, TLI, DT);
1712 }
1713 
1715 
1717  "Memory Dependence Analysis", false, true)
1724 
1725 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1727 }
1728 
1730 
1732  MemDep.reset();
1733 }
1734 
1736  AU.setPreservesAll();
1741 }
1742 
1745  // Check whether our analysis is preserved.
1746  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1747  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1748  // If not, give up now.
1749  return true;
1750 
1751  // Check whether the analyses we depend on became invalid for any reason.
1752  if (Inv.invalidate<AAManager>(F, PA) ||
1753  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1754  Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1755  return true;
1756 
1757  // Otherwise this analysis result remains valid.
1758  return false;
1759 }
1760 
1762  return BlockScanLimit;
1763 }
1764 
1766  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1767  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1768  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1769  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1770  MemDep.emplace(AA, AC, TLI, DT);
1771  return false;
1772 }
The two locations precisely alias each other.
Definition: AliasAnalysis.h:85
void push_back(const T &Elt)
Definition: SmallVector.h:212
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT)
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:577
bool isSimple() const
Definition: Instructions.h:262
iterator_range< use_iterator > uses()
Definition: Value.h:350
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
PointerTy getPointer() const
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:440
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:83
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1455
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool onlyReadsMemory(ImmutableCallSite CS)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
bool isTerminator() const
Definition: Instruction.h:124
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:254
STATISTIC(NumFunctions, "Total number of functions")
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:232
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:285
The access modifies the value stored in memory.
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:304
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:176
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:94
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
InstrTy * getInstruction() const
Definition: CallSite.h:89
An analysis that produces MemoryDependenceResults for a function.
The access references the value stored in memory.
Definition: AliasAnalysis.h:98
std::vector< NonLocalDepEntry > NonLocalDepInfo
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:695
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:250
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:190
An instruction for storing to memory.
Definition: Instructions.h:306
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:131
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:564
The access neither references nor modifies the value stored in memory.
Definition: AliasAnalysis.h:96
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
const NonLocalDepInfo & getNonLocalCallDependency(CallSite QueryCS)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
MemoryLocation getWithoutAATags() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:372
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:73
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getPointerOperand()
Definition: Instructions.h:270
self_iterator getIterator()
Definition: ilist_node.h:82
void setResult(const MemDepResult &R)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:632
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:527
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:93
A function analysis which provides an AssumptionCache.
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:423
iterator end()
Definition: BasicBlock.h:254
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
static bool isReadOnly(const GlobalValue *GV)
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:234
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
static bool isStrongerThanUnordered(AtomicOrdering ao)
Basic Alias true
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
The access both references and modifies the value stored in memory.
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:126
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:60
MemoryLocation getWithNewSize(uint64_t NewSize) const
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:559
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:91
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
#define DEBUG(X)
Definition: Debug.h:118
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:304
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:261
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:201
static bool isVolatile(Instruction *Inst)
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Dependence - This class represents a dependence between two memory memory references in a function...
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
#define D
Definition: LargeTest.cpp:26
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
uint64_t Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.