LLVM  6.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Use.h"
49 #include "llvm/IR/User.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "memdep"
67 
68 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
69 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
70 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
71 
72 STATISTIC(NumCacheNonLocalPtr,
73  "Number of fully cached non-local ptr responses");
74 STATISTIC(NumCacheDirtyNonLocalPtr,
75  "Number of cached, but dirty, non-local ptr responses");
76 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
77 STATISTIC(NumCacheCompleteNonLocalPtr,
78  "Number of block queries that were completely cached");
79 
80 // Limit for the number of instructions to scan in a block.
81 
83  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
84  cl::desc("The number of instructions to scan in a block in memory "
85  "dependency analysis (default = 100)"));
86 
87 static cl::opt<unsigned>
88  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
89  cl::desc("The number of blocks to scan during memory "
90  "dependency analysis (default = 1000)"));
91 
92 // Limit on the number of memdep results to process.
93 static const unsigned int NumResultsLimit = 100;
94 
95 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
96 ///
97 /// If the set becomes empty, remove Inst's entry.
98 template <typename KeyTy>
99 static void
101  Instruction *Inst, KeyTy Val) {
102  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
103  ReverseMap.find(Inst);
104  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
105  bool Found = InstIt->second.erase(Val);
106  assert(Found && "Invalid reverse map!");
107  (void)Found;
108  if (InstIt->second.empty())
109  ReverseMap.erase(InstIt);
110 }
111 
112 /// If the given instruction references a specific memory location, fill in Loc
113 /// with the details, otherwise set Loc.Ptr to null.
114 ///
115 /// Returns a ModRefInfo value describing the general behavior of the
116 /// instruction.
118  const TargetLibraryInfo &TLI) {
119  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
120  if (LI->isUnordered()) {
121  Loc = MemoryLocation::get(LI);
122  return MRI_Ref;
123  }
124  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
125  Loc = MemoryLocation::get(LI);
126  return MRI_ModRef;
127  }
128  Loc = MemoryLocation();
129  return MRI_ModRef;
130  }
131 
132  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
133  if (SI->isUnordered()) {
134  Loc = MemoryLocation::get(SI);
135  return MRI_Mod;
136  }
137  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
138  Loc = MemoryLocation::get(SI);
139  return MRI_ModRef;
140  }
141  Loc = MemoryLocation();
142  return MRI_ModRef;
143  }
144 
145  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
146  Loc = MemoryLocation::get(V);
147  return MRI_ModRef;
148  }
149 
150  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
151  // calls to free() deallocate the entire structure
152  Loc = MemoryLocation(CI->getArgOperand(0));
153  return MRI_Mod;
154  }
155 
156  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
157  AAMDNodes AAInfo;
158 
159  switch (II->getIntrinsicID()) {
160  case Intrinsic::lifetime_start:
161  case Intrinsic::lifetime_end:
162  case Intrinsic::invariant_start:
163  II->getAAMetadata(AAInfo);
164  Loc = MemoryLocation(
165  II->getArgOperand(1),
166  cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
167  // These intrinsics don't really modify the memory, but returning Mod
168  // will allow them to be handled conservatively.
169  return MRI_Mod;
170  case Intrinsic::invariant_end:
171  II->getAAMetadata(AAInfo);
172  Loc = MemoryLocation(
173  II->getArgOperand(2),
174  cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
175  // These intrinsics don't really modify the memory, but returning Mod
176  // will allow them to be handled conservatively.
177  return MRI_Mod;
178  default:
179  break;
180  }
181  }
182 
183  // Otherwise, just do the coarse-grained thing that always works.
184  if (Inst->mayWriteToMemory())
185  return MRI_ModRef;
186  if (Inst->mayReadFromMemory())
187  return MRI_Ref;
188  return MRI_NoModRef;
189 }
190 
191 /// Private helper for finding the local dependencies of a call site.
192 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
193  CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
194  BasicBlock *BB) {
195  unsigned Limit = BlockScanLimit;
196 
197  // Walk backwards through the block, looking for dependencies.
198  while (ScanIt != BB->begin()) {
199  // Limit the amount of scanning we do so we don't end up with quadratic
200  // running time on extreme testcases.
201  --Limit;
202  if (!Limit)
203  return MemDepResult::getUnknown();
204 
205  Instruction *Inst = &*--ScanIt;
206 
207  // If this inst is a memory op, get the pointer it accessed
208  MemoryLocation Loc;
209  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
210  if (Loc.Ptr) {
211  // A simple instruction.
212  if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef)
213  return MemDepResult::getClobber(Inst);
214  continue;
215  }
216 
217  if (auto InstCS = CallSite(Inst)) {
218  // Debug intrinsics don't cause dependences.
219  if (isa<DbgInfoIntrinsic>(Inst))
220  continue;
221  // If these two calls do not interfere, look past it.
222  switch (AA.getModRefInfo(CS, InstCS)) {
223  case MRI_NoModRef:
224  // If the two calls are the same, return InstCS as a Def, so that
225  // CS can be found redundant and eliminated.
226  if (isReadOnlyCall && !(MR & MRI_Mod) &&
228  return MemDepResult::getDef(Inst);
229 
230  // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
231  // keep scanning.
232  continue;
233  default:
234  return MemDepResult::getClobber(Inst);
235  }
236  }
237 
238  // If we could not obtain a pointer for the instruction and the instruction
239  // touches memory then assume that this is a dependency.
240  if (MR != MRI_NoModRef)
241  return MemDepResult::getClobber(Inst);
242  }
243 
244  // No dependence found. If this is the entry block of the function, it is
245  // unknown, otherwise it is non-local.
246  if (BB != &BB->getParent()->getEntryBlock())
247  return MemDepResult::getNonLocal();
249 }
250 
252  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
253  const LoadInst *LI) {
254  // We can only extend simple integer loads.
255  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
256  return 0;
257 
258  // Load widening is hostile to ThreadSanitizer: it may cause false positives
259  // or make the reports more cryptic (access sizes are wrong).
260  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
261  return 0;
262 
263  const DataLayout &DL = LI->getModule()->getDataLayout();
264 
265  // Get the base of this load.
266  int64_t LIOffs = 0;
267  const Value *LIBase =
269 
270  // If the two pointers are not based on the same pointer, we can't tell that
271  // they are related.
272  if (LIBase != MemLocBase)
273  return 0;
274 
275  // Okay, the two values are based on the same pointer, but returned as
276  // no-alias. This happens when we have things like two byte loads at "P+1"
277  // and "P+3". Check to see if increasing the size of the "LI" load up to its
278  // alignment (or the largest native integer type) will allow us to load all
279  // the bits required by MemLoc.
280 
281  // If MemLoc is before LI, then no widening of LI will help us out.
282  if (MemLocOffs < LIOffs)
283  return 0;
284 
285  // Get the alignment of the load in bytes. We assume that it is safe to load
286  // any legal integer up to this size without a problem. For example, if we're
287  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
288  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
289  // to i16.
290  unsigned LoadAlign = LI->getAlignment();
291 
292  int64_t MemLocEnd = MemLocOffs + MemLocSize;
293 
294  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
295  if (LIOffs + LoadAlign < MemLocEnd)
296  return 0;
297 
298  // This is the size of the load to try. Start with the next larger power of
299  // two.
300  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
301  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
302 
303  while (true) {
304  // If this load size is bigger than our known alignment or would not fit
305  // into a native integer register, then we fail.
306  if (NewLoadByteSize > LoadAlign ||
307  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
308  return 0;
309 
310  if (LIOffs + NewLoadByteSize > MemLocEnd &&
312  Attribute::SanitizeAddress))
313  // We will be reading past the location accessed by the original program.
314  // While this is safe in a regular build, Address Safety analysis tools
315  // may start reporting false warnings. So, don't do widening.
316  return 0;
317 
318  // If a load of this width would include all of MemLoc, then we succeed.
319  if (LIOffs + NewLoadByteSize >= MemLocEnd)
320  return NewLoadByteSize;
321 
322  NewLoadByteSize <<= 1;
323  }
324 }
325 
326 static bool isVolatile(Instruction *Inst) {
327  if (auto *LI = dyn_cast<LoadInst>(Inst))
328  return LI->isVolatile();
329  if (auto *SI = dyn_cast<StoreInst>(Inst))
330  return SI->isVolatile();
331  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
332  return AI->isVolatile();
333  return false;
334 }
335 
337  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
338  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
339  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
340  if (QueryInst != nullptr) {
341  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
342  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
343 
344  if (InvariantGroupDependency.isDef())
345  return InvariantGroupDependency;
346  }
347  }
349  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
350  if (SimpleDep.isDef())
351  return SimpleDep;
352  // Non-local invariant group dependency indicates there is non local Def
353  // (it only returns nonLocal if it finds nonLocal def), which is better than
354  // local clobber and everything else.
355  if (InvariantGroupDependency.isNonLocal())
356  return InvariantGroupDependency;
357 
358  assert(InvariantGroupDependency.isUnknown() &&
359  "InvariantGroupDependency should be only unknown at this point");
360  return SimpleDep;
361 }
362 
365  BasicBlock *BB) {
366  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
367  if (!InvariantGroupMD)
368  return MemDepResult::getUnknown();
369 
370  // Take the ptr operand after all casts and geps 0. This way we can search
371  // cast graph down only.
372  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
373 
374  // It's is not safe to walk the use list of global value, because function
375  // passes aren't allowed to look outside their functions.
376  // FIXME: this could be fixed by filtering instructions from outside
377  // of current function.
378  if (isa<GlobalValue>(LoadOperand))
379  return MemDepResult::getUnknown();
380 
381  // Queue to process all pointers that are equivalent to load operand.
382  SmallVector<const Value *, 8> LoadOperandsQueue;
383  LoadOperandsQueue.push_back(LoadOperand);
384 
385  Instruction *ClosestDependency = nullptr;
386  // Order of instructions in uses list is unpredictible. In order to always
387  // get the same result, we will look for the closest dominance.
388  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
389  assert(Other && "Must call it with not null instruction");
390  if (Best == nullptr || DT.dominates(Best, Other))
391  return Other;
392  return Best;
393  };
394 
395  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
396  // we will see all the instructions. This should be fixed in MSSA.
397  while (!LoadOperandsQueue.empty()) {
398  const Value *Ptr = LoadOperandsQueue.pop_back_val();
399  assert(Ptr && !isa<GlobalValue>(Ptr) &&
400  "Null or GlobalValue should not be inserted");
401 
402  for (const Use &Us : Ptr->uses()) {
403  auto *U = dyn_cast<Instruction>(Us.getUser());
404  if (!U || U == LI || !DT.dominates(U, LI))
405  continue;
406 
407  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
408  // users. U = bitcast Ptr
409  if (isa<BitCastInst>(U)) {
410  LoadOperandsQueue.push_back(U);
411  continue;
412  }
413  // Gep with zeros is equivalent to bitcast.
414  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
415  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
416  // typeless pointers will be ready then both cases will be gone
417  // (and this BFS also won't be needed).
418  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
419  if (GEP->hasAllZeroIndices()) {
420  LoadOperandsQueue.push_back(U);
421  continue;
422  }
423 
424  // If we hit load/store with the same invariant.group metadata (and the
425  // same pointer operand) we can assume that value pointed by pointer
426  // operand didn't change.
427  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
428  U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
429  ClosestDependency = GetClosestDependency(ClosestDependency, U);
430  }
431  }
432 
433  if (!ClosestDependency)
434  return MemDepResult::getUnknown();
435  if (ClosestDependency->getParent() == BB)
436  return MemDepResult::getDef(ClosestDependency);
437  // Def(U) can't be returned here because it is non-local. If local
438  // dependency won't be found then return nonLocal counting that the
439  // user will call getNonLocalPointerDependency, which will return cached
440  // result.
441  NonLocalDefsCache.try_emplace(
442  LI, NonLocalDepResult(ClosestDependency->getParent(),
443  MemDepResult::getDef(ClosestDependency), nullptr));
444  return MemDepResult::getNonLocal();
445 }
446 
448  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
449  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
450  bool isInvariantLoad = false;
451 
452  if (!Limit) {
453  unsigned DefaultLimit = BlockScanLimit;
454  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
455  &DefaultLimit);
456  }
457 
458  // We must be careful with atomic accesses, as they may allow another thread
459  // to touch this location, clobbering it. We are conservative: if the
460  // QueryInst is not a simple (non-atomic) memory access, we automatically
461  // return getClobber.
462  // If it is simple, we know based on the results of
463  // "Compiler testing via a theory of sound optimisations in the C11/C++11
464  // memory model" in PLDI 2013, that a non-atomic location can only be
465  // clobbered between a pair of a release and an acquire action, with no
466  // access to the location in between.
467  // Here is an example for giving the general intuition behind this rule.
468  // In the following code:
469  // store x 0;
470  // release action; [1]
471  // acquire action; [4]
472  // %val = load x;
473  // It is unsafe to replace %val by 0 because another thread may be running:
474  // acquire action; [2]
475  // store x 42;
476  // release action; [3]
477  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
478  // being 42. A key property of this program however is that if either
479  // 1 or 4 were missing, there would be a race between the store of 42
480  // either the store of 0 or the load (making the whole program racy).
481  // The paper mentioned above shows that the same property is respected
482  // by every program that can detect any optimization of that kind: either
483  // it is racy (undefined) or there is a release followed by an acquire
484  // between the pair of accesses under consideration.
485 
486  // If the load is invariant, we "know" that it doesn't alias *any* write. We
487  // do want to respect mustalias results since defs are useful for value
488  // forwarding, but any mayalias write can be assumed to be noalias.
489  // Arguably, this logic should be pushed inside AliasAnalysis itself.
490  if (isLoad && QueryInst) {
491  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
492  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
493  isInvariantLoad = true;
494  }
495 
496  const DataLayout &DL = BB->getModule()->getDataLayout();
497 
498  // Create a numbered basic block to lazily compute and cache instruction
499  // positions inside a BB. This is used to provide fast queries for relative
500  // position between two instructions in a BB and can be used by
501  // AliasAnalysis::callCapturesBefore.
502  OrderedBasicBlock OBB(BB);
503 
504  // Return "true" if and only if the instruction I is either a non-simple
505  // load or a non-simple store.
506  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
507  if (auto *LI = dyn_cast<LoadInst>(I))
508  return !LI->isSimple();
509  if (auto *SI = dyn_cast<StoreInst>(I))
510  return !SI->isSimple();
511  return false;
512  };
513 
514  // Return "true" if I is not a load and not a store, but it does access
515  // memory.
516  auto isOtherMemAccess = [](Instruction *I) -> bool {
517  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
518  };
519 
520  // Walk backwards through the basic block, looking for dependencies.
521  while (ScanIt != BB->begin()) {
522  Instruction *Inst = &*--ScanIt;
523 
524  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
525  // Debug intrinsics don't (and can't) cause dependencies.
526  if (isa<DbgInfoIntrinsic>(II))
527  continue;
528 
529  // Limit the amount of scanning we do so we don't end up with quadratic
530  // running time on extreme testcases.
531  --*Limit;
532  if (!*Limit)
533  return MemDepResult::getUnknown();
534 
535  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
536  // If we reach a lifetime begin or end marker, then the query ends here
537  // because the value is undefined.
538  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
539  // FIXME: This only considers queries directly on the invariant-tagged
540  // pointer, not on query pointers that are indexed off of them. It'd
541  // be nice to handle that at some point (the right approach is to use
542  // GetPointerBaseWithConstantOffset).
543  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
544  return MemDepResult::getDef(II);
545  continue;
546  }
547  }
548 
549  // Values depend on loads if the pointers are must aliased. This means
550  // that a load depends on another must aliased load from the same value.
551  // One exception is atomic loads: a value can depend on an atomic load that
552  // it does not alias with when this atomic load indicates that another
553  // thread may be accessing the location.
554  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
555  // While volatile access cannot be eliminated, they do not have to clobber
556  // non-aliasing locations, as normal accesses, for example, can be safely
557  // reordered with volatile accesses.
558  if (LI->isVolatile()) {
559  if (!QueryInst)
560  // Original QueryInst *may* be volatile
561  return MemDepResult::getClobber(LI);
562  if (isVolatile(QueryInst))
563  // Ordering required if QueryInst is itself volatile
564  return MemDepResult::getClobber(LI);
565  // Otherwise, volatile doesn't imply any special ordering
566  }
567 
568  // Atomic loads have complications involved.
569  // A Monotonic (or higher) load is OK if the query inst is itself not
570  // atomic.
571  // FIXME: This is overly conservative.
572  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
573  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
574  isOtherMemAccess(QueryInst))
575  return MemDepResult::getClobber(LI);
576  if (LI->getOrdering() != AtomicOrdering::Monotonic)
577  return MemDepResult::getClobber(LI);
578  }
579 
580  MemoryLocation LoadLoc = MemoryLocation::get(LI);
581 
582  // If we found a pointer, check if it could be the same as our pointer.
583  AliasResult R = AA.alias(LoadLoc, MemLoc);
584 
585  if (isLoad) {
586  if (R == NoAlias)
587  continue;
588 
589  // Must aliased loads are defs of each other.
590  if (R == MustAlias)
591  return MemDepResult::getDef(Inst);
592 
593 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
594  // in terms of clobbering loads, but since it does this by looking
595  // at the clobbering load directly, it doesn't know about any
596  // phi translation that may have happened along the way.
597 
598  // If we have a partial alias, then return this as a clobber for the
599  // client to handle.
600  if (R == PartialAlias)
601  return MemDepResult::getClobber(Inst);
602 #endif
603 
604  // Random may-alias loads don't depend on each other without a
605  // dependence.
606  continue;
607  }
608 
609  // Stores don't depend on other no-aliased accesses.
610  if (R == NoAlias)
611  continue;
612 
613  // Stores don't alias loads from read-only memory.
614  if (AA.pointsToConstantMemory(LoadLoc))
615  continue;
616 
617  // Stores depend on may/must aliased loads.
618  return MemDepResult::getDef(Inst);
619  }
620 
621  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
622  // Atomic stores have complications involved.
623  // A Monotonic store is OK if the query inst is itself not atomic.
624  // FIXME: This is overly conservative.
625  if (!SI->isUnordered() && SI->isAtomic()) {
626  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
627  isOtherMemAccess(QueryInst))
629  if (SI->getOrdering() != AtomicOrdering::Monotonic)
631  }
632 
633  // FIXME: this is overly conservative.
634  // While volatile access cannot be eliminated, they do not have to clobber
635  // non-aliasing locations, as normal accesses can for example be reordered
636  // with volatile accesses.
637  if (SI->isVolatile())
638  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
639  isOtherMemAccess(QueryInst))
641 
642  // If alias analysis can tell that this store is guaranteed to not modify
643  // the query pointer, ignore it. Use getModRefInfo to handle cases where
644  // the query pointer points to constant memory etc.
645  if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef)
646  continue;
647 
648  // Ok, this store might clobber the query pointer. Check to see if it is
649  // a must alias: in this case, we want to return this as a def.
651 
652  // If we found a pointer, check if it could be the same as our pointer.
653  AliasResult R = AA.alias(StoreLoc, MemLoc);
654 
655  if (R == NoAlias)
656  continue;
657  if (R == MustAlias)
658  return MemDepResult::getDef(Inst);
659  if (isInvariantLoad)
660  continue;
661  return MemDepResult::getClobber(Inst);
662  }
663 
664  // If this is an allocation, and if we know that the accessed pointer is to
665  // the allocation, return Def. This means that there is no dependence and
666  // the access can be optimized based on that. For example, a load could
667  // turn into undef. Note that we can bypass the allocation itself when
668  // looking for a clobber in many cases; that's an alias property and is
669  // handled by BasicAA.
670  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
671  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
672  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
673  return MemDepResult::getDef(Inst);
674  }
675 
676  if (isInvariantLoad)
677  continue;
678 
679  // A release fence requires that all stores complete before it, but does
680  // not prevent the reordering of following loads or stores 'before' the
681  // fence. As a result, we look past it when finding a dependency for
682  // loads. DSE uses this to find preceeding stores to delete and thus we
683  // can't bypass the fence if the query instruction is a store.
684  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
685  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
686  continue;
687 
688  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
689  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
690  // If necessary, perform additional analysis.
691  if (MR == MRI_ModRef)
692  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
693  switch (MR) {
694  case MRI_NoModRef:
695  // If the call has no effect on the queried pointer, just ignore it.
696  continue;
697  case MRI_Mod:
698  return MemDepResult::getClobber(Inst);
699  case MRI_Ref:
700  // If the call is known to never store to the pointer, and if this is a
701  // load query, we can safely ignore it (scan past it).
702  if (isLoad)
703  continue;
705  default:
706  // Otherwise, there is a potential dependence. Return a clobber.
707  return MemDepResult::getClobber(Inst);
708  }
709  }
710 
711  // No dependence found. If this is the entry block of the function, it is
712  // unknown, otherwise it is non-local.
713  if (BB != &BB->getParent()->getEntryBlock())
714  return MemDepResult::getNonLocal();
716 }
717 
719  Instruction *ScanPos = QueryInst;
720 
721  // Check for a cached result
722  MemDepResult &LocalCache = LocalDeps[QueryInst];
723 
724  // If the cached entry is non-dirty, just return it. Note that this depends
725  // on MemDepResult's default constructing to 'dirty'.
726  if (!LocalCache.isDirty())
727  return LocalCache;
728 
729  // Otherwise, if we have a dirty entry, we know we can start the scan at that
730  // instruction, which may save us some work.
731  if (Instruction *Inst = LocalCache.getInst()) {
732  ScanPos = Inst;
733 
734  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
735  }
736 
737  BasicBlock *QueryParent = QueryInst->getParent();
738 
739  // Do the scan.
740  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
741  // No dependence found. If this is the entry block of the function, it is
742  // unknown, otherwise it is non-local.
743  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
744  LocalCache = MemDepResult::getNonLocal();
745  else
746  LocalCache = MemDepResult::getNonFuncLocal();
747  } else {
748  MemoryLocation MemLoc;
749  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
750  if (MemLoc.Ptr) {
751  // If we can do a pointer scan, make it happen.
752  bool isLoad = !(MR & MRI_Mod);
753  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
754  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
755 
756  LocalCache = getPointerDependencyFrom(
757  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
758  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
759  CallSite QueryCS(QueryInst);
760  bool isReadOnly = AA.onlyReadsMemory(QueryCS);
761  LocalCache = getCallSiteDependencyFrom(
762  QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
763  } else
764  // Non-memory instruction.
765  LocalCache = MemDepResult::getUnknown();
766  }
767 
768  // Remember the result!
769  if (Instruction *I = LocalCache.getInst())
770  ReverseLocalDeps[I].insert(QueryInst);
771 
772  return LocalCache;
773 }
774 
775 #ifndef NDEBUG
776 /// This method is used when -debug is specified to verify that cache arrays
777 /// are properly kept sorted.
779  int Count = -1) {
780  if (Count == -1)
781  Count = Cache.size();
782  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
783  "Cache isn't sorted!");
784 }
785 #endif
786 
789  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
790  "getNonLocalCallDependency should only be used on calls with "
791  "non-local deps!");
792  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
793  NonLocalDepInfo &Cache = CacheP.first;
794 
795  // This is the set of blocks that need to be recomputed. In the cached case,
796  // this can happen due to instructions being deleted etc. In the uncached
797  // case, this starts out as the set of predecessors we care about.
798  SmallVector<BasicBlock *, 32> DirtyBlocks;
799 
800  if (!Cache.empty()) {
801  // Okay, we have a cache entry. If we know it is not dirty, just return it
802  // with no computation.
803  if (!CacheP.second) {
804  ++NumCacheNonLocal;
805  return Cache;
806  }
807 
808  // If we already have a partially computed set of results, scan them to
809  // determine what is dirty, seeding our initial DirtyBlocks worklist.
810  for (auto &Entry : Cache)
811  if (Entry.getResult().isDirty())
812  DirtyBlocks.push_back(Entry.getBB());
813 
814  // Sort the cache so that we can do fast binary search lookups below.
815  std::sort(Cache.begin(), Cache.end());
816 
817  ++NumCacheDirtyNonLocal;
818  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
819  // << Cache.size() << " cached: " << *QueryInst;
820  } else {
821  // Seed DirtyBlocks with each of the preds of QueryInst's block.
822  BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
823  for (BasicBlock *Pred : PredCache.get(QueryBB))
824  DirtyBlocks.push_back(Pred);
825  ++NumUncacheNonLocal;
826  }
827 
828  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
829  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
830 
832 
833  unsigned NumSortedEntries = Cache.size();
834  DEBUG(AssertSorted(Cache));
835 
836  // Iterate while we still have blocks to update.
837  while (!DirtyBlocks.empty()) {
838  BasicBlock *DirtyBB = DirtyBlocks.back();
839  DirtyBlocks.pop_back();
840 
841  // Already processed this block?
842  if (!Visited.insert(DirtyBB).second)
843  continue;
844 
845  // Do a binary search to see if we already have an entry for this block in
846  // the cache set. If so, find it.
847  DEBUG(AssertSorted(Cache, NumSortedEntries));
848  NonLocalDepInfo::iterator Entry =
849  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
850  NonLocalDepEntry(DirtyBB));
851  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
852  --Entry;
853 
854  NonLocalDepEntry *ExistingResult = nullptr;
855  if (Entry != Cache.begin() + NumSortedEntries &&
856  Entry->getBB() == DirtyBB) {
857  // If we already have an entry, and if it isn't already dirty, the block
858  // is done.
859  if (!Entry->getResult().isDirty())
860  continue;
861 
862  // Otherwise, remember this slot so we can update the value.
863  ExistingResult = &*Entry;
864  }
865 
866  // If the dirty entry has a pointer, start scanning from it so we don't have
867  // to rescan the entire block.
868  BasicBlock::iterator ScanPos = DirtyBB->end();
869  if (ExistingResult) {
870  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
871  ScanPos = Inst->getIterator();
872  // We're removing QueryInst's use of Inst.
873  RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
874  QueryCS.getInstruction());
875  }
876  }
877 
878  // Find out if this block has a local dependency for QueryInst.
879  MemDepResult Dep;
880 
881  if (ScanPos != DirtyBB->begin()) {
882  Dep =
883  getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
884  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
885  // No dependence found. If this is the entry block of the function, it is
886  // a clobber, otherwise it is unknown.
888  } else {
890  }
891 
892  // If we had a dirty entry for the block, update it. Otherwise, just add
893  // a new entry.
894  if (ExistingResult)
895  ExistingResult->setResult(Dep);
896  else
897  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
898 
899  // If the block has a dependency (i.e. it isn't completely transparent to
900  // the value), remember the association!
901  if (!Dep.isNonLocal()) {
902  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
903  // update this when we remove instructions.
904  if (Instruction *Inst = Dep.getInst())
905  ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
906  } else {
907 
908  // If the block *is* completely transparent to the load, we need to check
909  // the predecessors of this block. Add them to our worklist.
910  for (BasicBlock *Pred : PredCache.get(DirtyBB))
911  DirtyBlocks.push_back(Pred);
912  }
913  }
914 
915  return Cache;
916 }
917 
919  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
920  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
921  bool isLoad = isa<LoadInst>(QueryInst);
922  BasicBlock *FromBB = QueryInst->getParent();
923  assert(FromBB);
924 
925  assert(Loc.Ptr->getType()->isPointerTy() &&
926  "Can't get pointer deps of a non-pointer!");
927  Result.clear();
928  {
929  // Check if there is cached Def with invariant.group. FIXME: cache might be
930  // invalid if cached instruction would be removed between call to
931  // getPointerDependencyFrom and this function.
932  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
933  if (NonLocalDefIt != NonLocalDefsCache.end()) {
934  Result.push_back(std::move(NonLocalDefIt->second));
935  NonLocalDefsCache.erase(NonLocalDefIt);
936  return;
937  }
938  }
939  // This routine does not expect to deal with volatile instructions.
940  // Doing so would require piping through the QueryInst all the way through.
941  // TODO: volatiles can't be elided, but they can be reordered with other
942  // non-volatile accesses.
943 
944  // We currently give up on any instruction which is ordered, but we do handle
945  // atomic instructions which are unordered.
946  // TODO: Handle ordered instructions
947  auto isOrdered = [](Instruction *Inst) {
948  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
949  return !LI->isUnordered();
950  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
951  return !SI->isUnordered();
952  }
953  return false;
954  };
955  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
957  const_cast<Value *>(Loc.Ptr)));
958  return;
959  }
960  const DataLayout &DL = FromBB->getModule()->getDataLayout();
961  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
962 
963  // This is the set of blocks we've inspected, and the pointer we consider in
964  // each block. Because of critical edges, we currently bail out if querying
965  // a block with multiple different pointers. This can happen during PHI
966  // translation.
968  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
969  Result, Visited, true))
970  return;
971  Result.clear();
973  const_cast<Value *>(Loc.Ptr)));
974 }
975 
976 /// Compute the memdep value for BB with Pointer/PointeeSize using either
977 /// cached information in Cache or by doing a lookup (which may use dirty cache
978 /// info if available).
979 ///
980 /// If we do a lookup, add the result to the cache.
981 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
982  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
983  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
984 
985  // Do a binary search to see if we already have an entry for this block in
986  // the cache set. If so, find it.
987  NonLocalDepInfo::iterator Entry = std::upper_bound(
988  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
989  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
990  --Entry;
991 
992  NonLocalDepEntry *ExistingResult = nullptr;
993  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
994  ExistingResult = &*Entry;
995 
996  // If we have a cached entry, and it is non-dirty, use it as the value for
997  // this dependency.
998  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
999  ++NumCacheNonLocalPtr;
1000  return ExistingResult->getResult();
1001  }
1002 
1003  // Otherwise, we have to scan for the value. If we have a dirty cache
1004  // entry, start scanning from its position, otherwise we scan from the end
1005  // of the block.
1006  BasicBlock::iterator ScanPos = BB->end();
1007  if (ExistingResult && ExistingResult->getResult().getInst()) {
1008  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1009  "Instruction invalidated?");
1010  ++NumCacheDirtyNonLocalPtr;
1011  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1012 
1013  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1014  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1015  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1016  } else {
1017  ++NumUncacheNonLocalPtr;
1018  }
1019 
1020  // Scan the block for the dependency.
1021  MemDepResult Dep =
1022  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1023 
1024  // If we had a dirty entry for the block, update it. Otherwise, just add
1025  // a new entry.
1026  if (ExistingResult)
1027  ExistingResult->setResult(Dep);
1028  else
1029  Cache->push_back(NonLocalDepEntry(BB, Dep));
1030 
1031  // If the block has a dependency (i.e. it isn't completely transparent to
1032  // the value), remember the reverse association because we just added it
1033  // to Cache!
1034  if (!Dep.isDef() && !Dep.isClobber())
1035  return Dep;
1036 
1037  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1038  // update MemDep when we remove instructions.
1039  Instruction *Inst = Dep.getInst();
1040  assert(Inst && "Didn't depend on anything?");
1041  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1042  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1043  return Dep;
1044 }
1045 
1046 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1047 /// array that are already properly ordered.
1048 ///
1049 /// This is optimized for the case when only a few entries are added.
1050 static void
1052  unsigned NumSortedEntries) {
1053  switch (Cache.size() - NumSortedEntries) {
1054  case 0:
1055  // done, no new entries.
1056  break;
1057  case 2: {
1058  // Two new entries, insert the last one into place.
1059  NonLocalDepEntry Val = Cache.back();
1060  Cache.pop_back();
1061  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1062  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1063  Cache.insert(Entry, Val);
1065  }
1066  case 1:
1067  // One new entry, Just insert the new value at the appropriate position.
1068  if (Cache.size() != 1) {
1069  NonLocalDepEntry Val = Cache.back();
1070  Cache.pop_back();
1071  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1072  std::upper_bound(Cache.begin(), Cache.end(), Val);
1073  Cache.insert(Entry, Val);
1074  }
1075  break;
1076  default:
1077  // Added many values, do a full scale sort.
1078  std::sort(Cache.begin(), Cache.end());
1079  break;
1080  }
1081 }
1082 
1083 /// Perform a dependency query based on pointer/pointeesize starting at the end
1084 /// of StartBB.
1085 ///
1086 /// Add any clobber/def results to the results vector and keep track of which
1087 /// blocks are visited in 'Visited'.
1088 ///
1089 /// This has special behavior for the first block queries (when SkipFirstBlock
1090 /// is true). In this special case, it ignores the contents of the specified
1091 /// block and starts returning dependence info for its predecessors.
1092 ///
1093 /// This function returns true on success, or false to indicate that it could
1094 /// not compute dependence information for some reason. This should be treated
1095 /// as a clobber dependence on the first instruction in the predecessor block.
1096 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1097  Instruction *QueryInst, const PHITransAddr &Pointer,
1098  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1100  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1101  // Look up the cached info for Pointer.
1102  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1103 
1104  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1105  // CacheKey, this value will be inserted as the associated value. Otherwise,
1106  // it'll be ignored, and we'll have to check to see if the cached size and
1107  // aa tags are consistent with the current query.
1108  NonLocalPointerInfo InitialNLPI;
1109  InitialNLPI.Size = Loc.Size;
1110  InitialNLPI.AATags = Loc.AATags;
1111 
1112  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1113  // already have one.
1114  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1115  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1116  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1117 
1118  // If we already have a cache entry for this CacheKey, we may need to do some
1119  // work to reconcile the cache entry and the current query.
1120  if (!Pair.second) {
1121  if (CacheInfo->Size < Loc.Size) {
1122  // The query's Size is greater than the cached one. Throw out the
1123  // cached data and proceed with the query at the greater size.
1124  CacheInfo->Pair = BBSkipFirstBlockPair();
1125  CacheInfo->Size = Loc.Size;
1126  for (auto &Entry : CacheInfo->NonLocalDeps)
1127  if (Instruction *Inst = Entry.getResult().getInst())
1128  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1129  CacheInfo->NonLocalDeps.clear();
1130  } else if (CacheInfo->Size > Loc.Size) {
1131  // This query's Size is less than the cached one. Conservatively restart
1132  // the query using the greater size.
1133  return getNonLocalPointerDepFromBB(
1134  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1135  StartBB, Result, Visited, SkipFirstBlock);
1136  }
1137 
1138  // If the query's AATags are inconsistent with the cached one,
1139  // conservatively throw out the cached data and restart the query with
1140  // no tag if needed.
1141  if (CacheInfo->AATags != Loc.AATags) {
1142  if (CacheInfo->AATags) {
1143  CacheInfo->Pair = BBSkipFirstBlockPair();
1144  CacheInfo->AATags = AAMDNodes();
1145  for (auto &Entry : CacheInfo->NonLocalDeps)
1146  if (Instruction *Inst = Entry.getResult().getInst())
1147  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1148  CacheInfo->NonLocalDeps.clear();
1149  }
1150  if (Loc.AATags)
1151  return getNonLocalPointerDepFromBB(
1152  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1153  Visited, SkipFirstBlock);
1154  }
1155  }
1156 
1157  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1158 
1159  // If we have valid cached information for exactly the block we are
1160  // investigating, just return it with no recomputation.
1161  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1162  // We have a fully cached result for this query then we can just return the
1163  // cached results and populate the visited set. However, we have to verify
1164  // that we don't already have conflicting results for these blocks. Check
1165  // to ensure that if a block in the results set is in the visited set that
1166  // it was for the same pointer query.
1167  if (!Visited.empty()) {
1168  for (auto &Entry : *Cache) {
1170  Visited.find(Entry.getBB());
1171  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1172  continue;
1173 
1174  // We have a pointer mismatch in a block. Just return false, saying
1175  // that something was clobbered in this result. We could also do a
1176  // non-fully cached query, but there is little point in doing this.
1177  return false;
1178  }
1179  }
1180 
1181  Value *Addr = Pointer.getAddr();
1182  for (auto &Entry : *Cache) {
1183  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1184  if (Entry.getResult().isNonLocal()) {
1185  continue;
1186  }
1187 
1188  if (DT.isReachableFromEntry(Entry.getBB())) {
1189  Result.push_back(
1190  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1191  }
1192  }
1193  ++NumCacheCompleteNonLocalPtr;
1194  return true;
1195  }
1196 
1197  // Otherwise, either this is a new block, a block with an invalid cache
1198  // pointer or one that we're about to invalidate by putting more info into it
1199  // than its valid cache info. If empty, the result will be valid cache info,
1200  // otherwise it isn't.
1201  if (Cache->empty())
1202  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1203  else
1204  CacheInfo->Pair = BBSkipFirstBlockPair();
1205 
1207  Worklist.push_back(StartBB);
1208 
1209  // PredList used inside loop.
1211 
1212  // Keep track of the entries that we know are sorted. Previously cached
1213  // entries will all be sorted. The entries we add we only sort on demand (we
1214  // don't insert every element into its sorted position). We know that we
1215  // won't get any reuse from currently inserted values, because we don't
1216  // revisit blocks after we insert info for them.
1217  unsigned NumSortedEntries = Cache->size();
1218  unsigned WorklistEntries = BlockNumberLimit;
1219  bool GotWorklistLimit = false;
1220  DEBUG(AssertSorted(*Cache));
1221 
1222  while (!Worklist.empty()) {
1223  BasicBlock *BB = Worklist.pop_back_val();
1224 
1225  // If we do process a large number of blocks it becomes very expensive and
1226  // likely it isn't worth worrying about
1227  if (Result.size() > NumResultsLimit) {
1228  Worklist.clear();
1229  // Sort it now (if needed) so that recursive invocations of
1230  // getNonLocalPointerDepFromBB and other routines that could reuse the
1231  // cache value will only see properly sorted cache arrays.
1232  if (Cache && NumSortedEntries != Cache->size()) {
1233  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1234  }
1235  // Since we bail out, the "Cache" set won't contain all of the
1236  // results for the query. This is ok (we can still use it to accelerate
1237  // specific block queries) but we can't do the fastpath "return all
1238  // results from the set". Clear out the indicator for this.
1239  CacheInfo->Pair = BBSkipFirstBlockPair();
1240  return false;
1241  }
1242 
1243  // Skip the first block if we have it.
1244  if (!SkipFirstBlock) {
1245  // Analyze the dependency of *Pointer in FromBB. See if we already have
1246  // been here.
1247  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1248 
1249  // Get the dependency info for Pointer in BB. If we have cached
1250  // information, we will use it, otherwise we compute it.
1251  DEBUG(AssertSorted(*Cache, NumSortedEntries));
1252  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1253  Cache, NumSortedEntries);
1254 
1255  // If we got a Def or Clobber, add this to the list of results.
1256  if (!Dep.isNonLocal()) {
1257  if (DT.isReachableFromEntry(BB)) {
1258  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1259  continue;
1260  }
1261  }
1262  }
1263 
1264  // If 'Pointer' is an instruction defined in this block, then we need to do
1265  // phi translation to change it into a value live in the predecessor block.
1266  // If not, we just add the predecessors to the worklist and scan them with
1267  // the same Pointer.
1268  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1269  SkipFirstBlock = false;
1271  for (BasicBlock *Pred : PredCache.get(BB)) {
1272  // Verify that we haven't looked at this block yet.
1273  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1274  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1275  if (InsertRes.second) {
1276  // First time we've looked at *PI.
1277  NewBlocks.push_back(Pred);
1278  continue;
1279  }
1280 
1281  // If we have seen this block before, but it was with a different
1282  // pointer then we have a phi translation failure and we have to treat
1283  // this as a clobber.
1284  if (InsertRes.first->second != Pointer.getAddr()) {
1285  // Make sure to clean up the Visited map before continuing on to
1286  // PredTranslationFailure.
1287  for (unsigned i = 0; i < NewBlocks.size(); i++)
1288  Visited.erase(NewBlocks[i]);
1289  goto PredTranslationFailure;
1290  }
1291  }
1292  if (NewBlocks.size() > WorklistEntries) {
1293  // Make sure to clean up the Visited map before continuing on to
1294  // PredTranslationFailure.
1295  for (unsigned i = 0; i < NewBlocks.size(); i++)
1296  Visited.erase(NewBlocks[i]);
1297  GotWorklistLimit = true;
1298  goto PredTranslationFailure;
1299  }
1300  WorklistEntries -= NewBlocks.size();
1301  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1302  continue;
1303  }
1304 
1305  // We do need to do phi translation, if we know ahead of time we can't phi
1306  // translate this value, don't even try.
1307  if (!Pointer.IsPotentiallyPHITranslatable())
1308  goto PredTranslationFailure;
1309 
1310  // We may have added values to the cache list before this PHI translation.
1311  // If so, we haven't done anything to ensure that the cache remains sorted.
1312  // Sort it now (if needed) so that recursive invocations of
1313  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1314  // value will only see properly sorted cache arrays.
1315  if (Cache && NumSortedEntries != Cache->size()) {
1316  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1317  NumSortedEntries = Cache->size();
1318  }
1319  Cache = nullptr;
1320 
1321  PredList.clear();
1322  for (BasicBlock *Pred : PredCache.get(BB)) {
1323  PredList.push_back(std::make_pair(Pred, Pointer));
1324 
1325  // Get the PHI translated pointer in this predecessor. This can fail if
1326  // not translatable, in which case the getAddr() returns null.
1327  PHITransAddr &PredPointer = PredList.back().second;
1328  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1329  Value *PredPtrVal = PredPointer.getAddr();
1330 
1331  // Check to see if we have already visited this pred block with another
1332  // pointer. If so, we can't do this lookup. This failure can occur
1333  // with PHI translation when a critical edge exists and the PHI node in
1334  // the successor translates to a pointer value different than the
1335  // pointer the block was first analyzed with.
1336  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1337  Visited.insert(std::make_pair(Pred, PredPtrVal));
1338 
1339  if (!InsertRes.second) {
1340  // We found the pred; take it off the list of preds to visit.
1341  PredList.pop_back();
1342 
1343  // If the predecessor was visited with PredPtr, then we already did
1344  // the analysis and can ignore it.
1345  if (InsertRes.first->second == PredPtrVal)
1346  continue;
1347 
1348  // Otherwise, the block was previously analyzed with a different
1349  // pointer. We can't represent the result of this case, so we just
1350  // treat this as a phi translation failure.
1351 
1352  // Make sure to clean up the Visited map before continuing on to
1353  // PredTranslationFailure.
1354  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1355  Visited.erase(PredList[i].first);
1356 
1357  goto PredTranslationFailure;
1358  }
1359  }
1360 
1361  // Actually process results here; this need to be a separate loop to avoid
1362  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1363  // any results for. (getNonLocalPointerDepFromBB will modify our
1364  // datastructures in ways the code after the PredTranslationFailure label
1365  // doesn't expect.)
1366  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1367  BasicBlock *Pred = PredList[i].first;
1368  PHITransAddr &PredPointer = PredList[i].second;
1369  Value *PredPtrVal = PredPointer.getAddr();
1370 
1371  bool CanTranslate = true;
1372  // If PHI translation was unable to find an available pointer in this
1373  // predecessor, then we have to assume that the pointer is clobbered in
1374  // that predecessor. We can still do PRE of the load, which would insert
1375  // a computation of the pointer in this predecessor.
1376  if (!PredPtrVal)
1377  CanTranslate = false;
1378 
1379  // FIXME: it is entirely possible that PHI translating will end up with
1380  // the same value. Consider PHI translating something like:
1381  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1382  // to recurse here, pedantically speaking.
1383 
1384  // If getNonLocalPointerDepFromBB fails here, that means the cached
1385  // result conflicted with the Visited list; we have to conservatively
1386  // assume it is unknown, but this also does not block PRE of the load.
1387  if (!CanTranslate ||
1388  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1389  Loc.getWithNewPtr(PredPtrVal), isLoad,
1390  Pred, Result, Visited)) {
1391  // Add the entry to the Result list.
1392  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1393  Result.push_back(Entry);
1394 
1395  // Since we had a phi translation failure, the cache for CacheKey won't
1396  // include all of the entries that we need to immediately satisfy future
1397  // queries. Mark this in NonLocalPointerDeps by setting the
1398  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1399  // cached value to do more work but not miss the phi trans failure.
1400  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1401  NLPI.Pair = BBSkipFirstBlockPair();
1402  continue;
1403  }
1404  }
1405 
1406  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1407  CacheInfo = &NonLocalPointerDeps[CacheKey];
1408  Cache = &CacheInfo->NonLocalDeps;
1409  NumSortedEntries = Cache->size();
1410 
1411  // Since we did phi translation, the "Cache" set won't contain all of the
1412  // results for the query. This is ok (we can still use it to accelerate
1413  // specific block queries) but we can't do the fastpath "return all
1414  // results from the set" Clear out the indicator for this.
1415  CacheInfo->Pair = BBSkipFirstBlockPair();
1416  SkipFirstBlock = false;
1417  continue;
1418 
1419  PredTranslationFailure:
1420  // The following code is "failure"; we can't produce a sane translation
1421  // for the given block. It assumes that we haven't modified any of
1422  // our datastructures while processing the current block.
1423 
1424  if (!Cache) {
1425  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1426  CacheInfo = &NonLocalPointerDeps[CacheKey];
1427  Cache = &CacheInfo->NonLocalDeps;
1428  NumSortedEntries = Cache->size();
1429  }
1430 
1431  // Since we failed phi translation, the "Cache" set won't contain all of the
1432  // results for the query. This is ok (we can still use it to accelerate
1433  // specific block queries) but we can't do the fastpath "return all
1434  // results from the set". Clear out the indicator for this.
1435  CacheInfo->Pair = BBSkipFirstBlockPair();
1436 
1437  // If *nothing* works, mark the pointer as unknown.
1438  //
1439  // If this is the magic first block, return this as a clobber of the whole
1440  // incoming value. Since we can't phi translate to one of the predecessors,
1441  // we have to bail out.
1442  if (SkipFirstBlock)
1443  return false;
1444 
1445  bool foundBlock = false;
1446  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1447  if (I.getBB() != BB)
1448  continue;
1449 
1450  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1451  !DT.isReachableFromEntry(BB)) &&
1452  "Should only be here with transparent block");
1453  foundBlock = true;
1454  I.setResult(MemDepResult::getUnknown());
1455  Result.push_back(
1456  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1457  break;
1458  }
1459  (void)foundBlock; (void)GotWorklistLimit;
1460  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1461  }
1462 
1463  // Okay, we're done now. If we added new values to the cache, re-sort it.
1464  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1465  DEBUG(AssertSorted(*Cache));
1466  return true;
1467 }
1468 
1469 /// If P exists in CachedNonLocalPointerInfo, remove it.
1470 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1471  ValueIsLoadPair P) {
1472  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1473  if (It == NonLocalPointerDeps.end())
1474  return;
1475 
1476  // Remove all of the entries in the BB->val map. This involves removing
1477  // instructions from the reverse map.
1478  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1479 
1480  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1481  Instruction *Target = PInfo[i].getResult().getInst();
1482  if (!Target)
1483  continue; // Ignore non-local dep results.
1484  assert(Target->getParent() == PInfo[i].getBB());
1485 
1486  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1487  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1488  }
1489 
1490  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1491  NonLocalPointerDeps.erase(It);
1492 }
1493 
1495  // If Ptr isn't really a pointer, just ignore it.
1496  if (!Ptr->getType()->isPointerTy())
1497  return;
1498  // Flush store info for the pointer.
1499  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1500  // Flush load info for the pointer.
1501  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1502 }
1503 
1505  PredCache.clear();
1506 }
1507 
1509  // Walk through the Non-local dependencies, removing this one as the value
1510  // for any cached queries.
1511  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1512  if (NLDI != NonLocalDeps.end()) {
1513  NonLocalDepInfo &BlockMap = NLDI->second.first;
1514  for (auto &Entry : BlockMap)
1515  if (Instruction *Inst = Entry.getResult().getInst())
1516  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1517  NonLocalDeps.erase(NLDI);
1518  }
1519 
1520  // If we have a cached local dependence query for this instruction, remove it.
1521  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1522  if (LocalDepEntry != LocalDeps.end()) {
1523  // Remove us from DepInst's reverse set now that the local dep info is gone.
1524  if (Instruction *Inst = LocalDepEntry->second.getInst())
1525  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1526 
1527  // Remove this local dependency info.
1528  LocalDeps.erase(LocalDepEntry);
1529  }
1530 
1531  // If we have any cached pointer dependencies on this instruction, remove
1532  // them. If the instruction has non-pointer type, then it can't be a pointer
1533  // base.
1534 
1535  // Remove it from both the load info and the store info. The instruction
1536  // can't be in either of these maps if it is non-pointer.
1537  if (RemInst->getType()->isPointerTy()) {
1538  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1539  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1540  }
1541 
1542  // Loop over all of the things that depend on the instruction we're removing.
1544 
1545  // If we find RemInst as a clobber or Def in any of the maps for other values,
1546  // we need to replace its entry with a dirty version of the instruction after
1547  // it. If RemInst is a terminator, we use a null dirty value.
1548  //
1549  // Using a dirty version of the instruction after RemInst saves having to scan
1550  // the entire block to get to this point.
1551  MemDepResult NewDirtyVal;
1552  if (!RemInst->isTerminator())
1553  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1554 
1555  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1556  if (ReverseDepIt != ReverseLocalDeps.end()) {
1557  // RemInst can't be the terminator if it has local stuff depending on it.
1558  assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
1559  "Nothing can locally depend on a terminator");
1560 
1561  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1562  assert(InstDependingOnRemInst != RemInst &&
1563  "Already removed our local dep info");
1564 
1565  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1566 
1567  // Make sure to remember that new things depend on NewDepInst.
1568  assert(NewDirtyVal.getInst() &&
1569  "There is no way something else can have "
1570  "a local dep on this if it is a terminator!");
1571  ReverseDepsToAdd.push_back(
1572  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1573  }
1574 
1575  ReverseLocalDeps.erase(ReverseDepIt);
1576 
1577  // Add new reverse deps after scanning the set, to avoid invalidating the
1578  // 'ReverseDeps' reference.
1579  while (!ReverseDepsToAdd.empty()) {
1580  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1581  ReverseDepsToAdd.back().second);
1582  ReverseDepsToAdd.pop_back();
1583  }
1584  }
1585 
1586  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1587  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1588  for (Instruction *I : ReverseDepIt->second) {
1589  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1590 
1591  PerInstNLInfo &INLD = NonLocalDeps[I];
1592  // The information is now dirty!
1593  INLD.second = true;
1594 
1595  for (auto &Entry : INLD.first) {
1596  if (Entry.getResult().getInst() != RemInst)
1597  continue;
1598 
1599  // Convert to a dirty entry for the subsequent instruction.
1600  Entry.setResult(NewDirtyVal);
1601 
1602  if (Instruction *NextI = NewDirtyVal.getInst())
1603  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1604  }
1605  }
1606 
1607  ReverseNonLocalDeps.erase(ReverseDepIt);
1608 
1609  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1610  while (!ReverseDepsToAdd.empty()) {
1611  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1612  ReverseDepsToAdd.back().second);
1613  ReverseDepsToAdd.pop_back();
1614  }
1615  }
1616 
1617  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1618  // value in the NonLocalPointerDeps info.
1619  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1620  ReverseNonLocalPtrDeps.find(RemInst);
1621  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1623  ReversePtrDepsToAdd;
1624 
1625  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1626  assert(P.getPointer() != RemInst &&
1627  "Already removed NonLocalPointerDeps info for RemInst");
1628 
1629  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1630 
1631  // The cache is not valid for any specific block anymore.
1632  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1633 
1634  // Update any entries for RemInst to use the instruction after it.
1635  for (auto &Entry : NLPDI) {
1636  if (Entry.getResult().getInst() != RemInst)
1637  continue;
1638 
1639  // Convert to a dirty entry for the subsequent instruction.
1640  Entry.setResult(NewDirtyVal);
1641 
1642  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1643  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1644  }
1645 
1646  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1647  // subsequent value may invalidate the sortedness.
1648  std::sort(NLPDI.begin(), NLPDI.end());
1649  }
1650 
1651  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1652 
1653  while (!ReversePtrDepsToAdd.empty()) {
1654  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1655  ReversePtrDepsToAdd.back().second);
1656  ReversePtrDepsToAdd.pop_back();
1657  }
1658  }
1659 
1660  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1661  DEBUG(verifyRemoved(RemInst));
1662 }
1663 
1664 /// Verify that the specified instruction does not occur in our internal data
1665 /// structures.
1666 ///
1667 /// This function verifies by asserting in debug builds.
1668 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1669 #ifndef NDEBUG
1670  for (const auto &DepKV : LocalDeps) {
1671  assert(DepKV.first != D && "Inst occurs in data structures");
1672  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1673  }
1674 
1675  for (const auto &DepKV : NonLocalPointerDeps) {
1676  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1677  for (const auto &Entry : DepKV.second.NonLocalDeps)
1678  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1679  }
1680 
1681  for (const auto &DepKV : NonLocalDeps) {
1682  assert(DepKV.first != D && "Inst occurs in data structures");
1683  const PerInstNLInfo &INLD = DepKV.second;
1684  for (const auto &Entry : INLD.first)
1685  assert(Entry.getResult().getInst() != D &&
1686  "Inst occurs in data structures");
1687  }
1688 
1689  for (const auto &DepKV : ReverseLocalDeps) {
1690  assert(DepKV.first != D && "Inst occurs in data structures");
1691  for (Instruction *Inst : DepKV.second)
1692  assert(Inst != D && "Inst occurs in data structures");
1693  }
1694 
1695  for (const auto &DepKV : ReverseNonLocalDeps) {
1696  assert(DepKV.first != D && "Inst occurs in data structures");
1697  for (Instruction *Inst : DepKV.second)
1698  assert(Inst != D && "Inst occurs in data structures");
1699  }
1700 
1701  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1702  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1703 
1704  for (ValueIsLoadPair P : DepKV.second)
1705  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1706  "Inst occurs in ReverseNonLocalPtrDeps map");
1707  }
1708 #endif
1709 }
1710 
1711 AnalysisKey MemoryDependenceAnalysis::Key;
1712 
1715  auto &AA = AM.getResult<AAManager>(F);
1716  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1717  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1718  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1719  return MemoryDependenceResults(AA, AC, TLI, DT);
1720 }
1721 
1723 
1725  "Memory Dependence Analysis", false, true)
1732 
1733 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1735 }
1736 
1738 
1740  MemDep.reset();
1741 }
1742 
1744  AU.setPreservesAll();
1749 }
1750 
1753  // Check whether our analysis is preserved.
1754  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1755  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1756  // If not, give up now.
1757  return true;
1758 
1759  // Check whether the analyses we depend on became invalid for any reason.
1760  if (Inv.invalidate<AAManager>(F, PA) ||
1761  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1762  Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1763  return true;
1764 
1765  // Otherwise this analysis result remains valid.
1766  return false;
1767 }
1768 
1770  return BlockScanLimit;
1771 }
1772 
1774  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1775  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1776  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1777  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1778  MemDep.emplace(AA, AC, TLI, DT);
1779  return false;
1780 }
The two locations precisely alias each other.
Definition: AliasAnalysis.h:91
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT)
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:577
bool isSimple() const
Definition: Instructions.h:262
iterator_range< use_iterator > uses()
Definition: Value.h:350
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
PointerTy getPointer() const
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:440
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:89
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool onlyReadsMemory(ImmutableCallSite CS)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
bool isTerminator() const
Definition: Instruction.h:128
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
STATISTIC(NumFunctions, "Total number of functions")
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:238
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
This defines the Use class.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:290
The access modifies the value stored in memory.
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:304
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
An analysis that produces MemoryDependenceResults for a function.
The access references the value stored in memory.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1499
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:720
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:248
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
An instruction for storing to memory.
Definition: Instructions.h:306
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:572
The access neither references nor modifies the value stored in memory.
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
const NonLocalDepInfo & getNonLocalCallDependency(CallSite QueryCS)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
MemoryLocation getWithoutAATags() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:363
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
self_iterator getIterator()
Definition: ilist_node.h:82
void setResult(const MemDepResult &R)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:632
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:527
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
A function analysis which provides an AssumptionCache.
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:410
iterator end()
Definition: BasicBlock.h:254
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
static bool isStrongerThanUnordered(AtomicOrdering ao)
Basic Alias true
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
The access both references and modifies the value stored in memory.
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:60
MemoryLocation getWithNewSize(uint64_t NewSize) const
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:559
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:91
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
#define DEBUG(X)
Definition: Debug.h:118
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:304
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Dependence - This class represents a dependence between two memory memory references in a function...
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
uint64_t Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.