LLVM  9.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements an analysis that determines, for a given memory
10 // operation, what preceding memory operations it depends on. It builds on
11 // alias analysis information, and tries to provide a lazy, caching interface to
12 // a common kind of alias information query.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/Metadata.h"
44 #include "llvm/IR/Module.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Use.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "memdep"
66 
67 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
68 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
69 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
70 
71 STATISTIC(NumCacheNonLocalPtr,
72  "Number of fully cached non-local ptr responses");
73 STATISTIC(NumCacheDirtyNonLocalPtr,
74  "Number of cached, but dirty, non-local ptr responses");
75 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
76 STATISTIC(NumCacheCompleteNonLocalPtr,
77  "Number of block queries that were completely cached");
78 
79 // Limit for the number of instructions to scan in a block.
80 
82  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
83  cl::desc("The number of instructions to scan in a block in memory "
84  "dependency analysis (default = 100)"));
85 
86 static cl::opt<unsigned>
87  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
88  cl::desc("The number of blocks to scan during memory "
89  "dependency analysis (default = 1000)"));
90 
91 // Limit on the number of memdep results to process.
92 static const unsigned int NumResultsLimit = 100;
93 
94 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
95 ///
96 /// If the set becomes empty, remove Inst's entry.
97 template <typename KeyTy>
98 static void
100  Instruction *Inst, KeyTy Val) {
101  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
102  ReverseMap.find(Inst);
103  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
104  bool Found = InstIt->second.erase(Val);
105  assert(Found && "Invalid reverse map!");
106  (void)Found;
107  if (InstIt->second.empty())
108  ReverseMap.erase(InstIt);
109 }
110 
111 /// If the given instruction references a specific memory location, fill in Loc
112 /// with the details, otherwise set Loc.Ptr to null.
113 ///
114 /// Returns a ModRefInfo value describing the general behavior of the
115 /// instruction.
117  const TargetLibraryInfo &TLI) {
118  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
119  if (LI->isUnordered()) {
120  Loc = MemoryLocation::get(LI);
121  return ModRefInfo::Ref;
122  }
123  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
124  Loc = MemoryLocation::get(LI);
125  return ModRefInfo::ModRef;
126  }
127  Loc = MemoryLocation();
128  return ModRefInfo::ModRef;
129  }
130 
131  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
132  if (SI->isUnordered()) {
133  Loc = MemoryLocation::get(SI);
134  return ModRefInfo::Mod;
135  }
136  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
137  Loc = MemoryLocation::get(SI);
138  return ModRefInfo::ModRef;
139  }
140  Loc = MemoryLocation();
141  return ModRefInfo::ModRef;
142  }
143 
144  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
145  Loc = MemoryLocation::get(V);
146  return ModRefInfo::ModRef;
147  }
148 
149  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
150  // calls to free() deallocate the entire structure
151  Loc = MemoryLocation(CI->getArgOperand(0));
152  return ModRefInfo::Mod;
153  }
154 
155  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
156  switch (II->getIntrinsicID()) {
157  case Intrinsic::lifetime_start:
158  case Intrinsic::lifetime_end:
159  case Intrinsic::invariant_start:
160  Loc = MemoryLocation::getForArgument(II, 1, TLI);
161  // These intrinsics don't really modify the memory, but returning Mod
162  // will allow them to be handled conservatively.
163  return ModRefInfo::Mod;
164  case Intrinsic::invariant_end:
165  Loc = MemoryLocation::getForArgument(II, 2, TLI);
166  // These intrinsics don't really modify the memory, but returning Mod
167  // will allow them to be handled conservatively.
168  return ModRefInfo::Mod;
169  default:
170  break;
171  }
172  }
173 
174  // Otherwise, just do the coarse-grained thing that always works.
175  if (Inst->mayWriteToMemory())
176  return ModRefInfo::ModRef;
177  if (Inst->mayReadFromMemory())
178  return ModRefInfo::Ref;
179  return ModRefInfo::NoModRef;
180 }
181 
182 /// Private helper for finding the local dependencies of a call site.
183 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184  CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185  BasicBlock *BB) {
186  unsigned Limit = BlockScanLimit;
187 
188  // Walk backwards through the block, looking for dependencies.
189  while (ScanIt != BB->begin()) {
190  Instruction *Inst = &*--ScanIt;
191  // Debug intrinsics don't cause dependences and should not affect Limit
192  if (isa<DbgInfoIntrinsic>(Inst))
193  continue;
194 
195  // Limit the amount of scanning we do so we don't end up with quadratic
196  // running time on extreme testcases.
197  --Limit;
198  if (!Limit)
199  return MemDepResult::getUnknown();
200 
201  // If this inst is a memory op, get the pointer it accessed
202  MemoryLocation Loc;
203  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204  if (Loc.Ptr) {
205  // A simple instruction.
206  if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207  return MemDepResult::getClobber(Inst);
208  continue;
209  }
210 
211  if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212  // If these two calls do not interfere, look past it.
213  if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214  // If the two calls are the same, return Inst as a Def, so that
215  // Call can be found redundant and eliminated.
216  if (isReadOnlyCall && !isModSet(MR) &&
217  Call->isIdenticalToWhenDefined(CallB))
218  return MemDepResult::getDef(Inst);
219 
220  // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221  // keep scanning.
222  continue;
223  } else
224  return MemDepResult::getClobber(Inst);
225  }
226 
227  // If we could not obtain a pointer for the instruction and the instruction
228  // touches memory then assume that this is a dependency.
229  if (isModOrRefSet(MR))
230  return MemDepResult::getClobber(Inst);
231  }
232 
233  // No dependence found. If this is the entry block of the function, it is
234  // unknown, otherwise it is non-local.
235  if (BB != &BB->getParent()->getEntryBlock())
236  return MemDepResult::getNonLocal();
238 }
239 
241  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
242  const LoadInst *LI) {
243  // We can only extend simple integer loads.
244  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
245  return 0;
246 
247  // Load widening is hostile to ThreadSanitizer: it may cause false positives
248  // or make the reports more cryptic (access sizes are wrong).
249  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
250  return 0;
251 
252  const DataLayout &DL = LI->getModule()->getDataLayout();
253 
254  // Get the base of this load.
255  int64_t LIOffs = 0;
256  const Value *LIBase =
258 
259  // If the two pointers are not based on the same pointer, we can't tell that
260  // they are related.
261  if (LIBase != MemLocBase)
262  return 0;
263 
264  // Okay, the two values are based on the same pointer, but returned as
265  // no-alias. This happens when we have things like two byte loads at "P+1"
266  // and "P+3". Check to see if increasing the size of the "LI" load up to its
267  // alignment (or the largest native integer type) will allow us to load all
268  // the bits required by MemLoc.
269 
270  // If MemLoc is before LI, then no widening of LI will help us out.
271  if (MemLocOffs < LIOffs)
272  return 0;
273 
274  // Get the alignment of the load in bytes. We assume that it is safe to load
275  // any legal integer up to this size without a problem. For example, if we're
276  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
277  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
278  // to i16.
279  unsigned LoadAlign = LI->getAlignment();
280 
281  int64_t MemLocEnd = MemLocOffs + MemLocSize;
282 
283  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
284  if (LIOffs + LoadAlign < MemLocEnd)
285  return 0;
286 
287  // This is the size of the load to try. Start with the next larger power of
288  // two.
289  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
290  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
291 
292  while (true) {
293  // If this load size is bigger than our known alignment or would not fit
294  // into a native integer register, then we fail.
295  if (NewLoadByteSize > LoadAlign ||
296  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
297  return 0;
298 
299  if (LIOffs + NewLoadByteSize > MemLocEnd &&
301  Attribute::SanitizeAddress) ||
303  Attribute::SanitizeHWAddress)))
304  // We will be reading past the location accessed by the original program.
305  // While this is safe in a regular build, Address Safety analysis tools
306  // may start reporting false warnings. So, don't do widening.
307  return 0;
308 
309  // If a load of this width would include all of MemLoc, then we succeed.
310  if (LIOffs + NewLoadByteSize >= MemLocEnd)
311  return NewLoadByteSize;
312 
313  NewLoadByteSize <<= 1;
314  }
315 }
316 
317 static bool isVolatile(Instruction *Inst) {
318  if (auto *LI = dyn_cast<LoadInst>(Inst))
319  return LI->isVolatile();
320  if (auto *SI = dyn_cast<StoreInst>(Inst))
321  return SI->isVolatile();
322  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
323  return AI->isVolatile();
324  return false;
325 }
326 
328  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
329  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
330  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
331  if (QueryInst != nullptr) {
332  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
333  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
334 
335  if (InvariantGroupDependency.isDef())
336  return InvariantGroupDependency;
337  }
338  }
340  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
341  if (SimpleDep.isDef())
342  return SimpleDep;
343  // Non-local invariant group dependency indicates there is non local Def
344  // (it only returns nonLocal if it finds nonLocal def), which is better than
345  // local clobber and everything else.
346  if (InvariantGroupDependency.isNonLocal())
347  return InvariantGroupDependency;
348 
349  assert(InvariantGroupDependency.isUnknown() &&
350  "InvariantGroupDependency should be only unknown at this point");
351  return SimpleDep;
352 }
353 
356  BasicBlock *BB) {
357 
359  return MemDepResult::getUnknown();
360 
361  // Take the ptr operand after all casts and geps 0. This way we can search
362  // cast graph down only.
363  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
364 
365  // It's is not safe to walk the use list of global value, because function
366  // passes aren't allowed to look outside their functions.
367  // FIXME: this could be fixed by filtering instructions from outside
368  // of current function.
369  if (isa<GlobalValue>(LoadOperand))
370  return MemDepResult::getUnknown();
371 
372  // Queue to process all pointers that are equivalent to load operand.
373  SmallVector<const Value *, 8> LoadOperandsQueue;
374  LoadOperandsQueue.push_back(LoadOperand);
375 
376  Instruction *ClosestDependency = nullptr;
377  // Order of instructions in uses list is unpredictible. In order to always
378  // get the same result, we will look for the closest dominance.
379  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
380  assert(Other && "Must call it with not null instruction");
381  if (Best == nullptr || DT.dominates(Best, Other))
382  return Other;
383  return Best;
384  };
385 
386  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
387  // we will see all the instructions. This should be fixed in MSSA.
388  while (!LoadOperandsQueue.empty()) {
389  const Value *Ptr = LoadOperandsQueue.pop_back_val();
390  assert(Ptr && !isa<GlobalValue>(Ptr) &&
391  "Null or GlobalValue should not be inserted");
392 
393  for (const Use &Us : Ptr->uses()) {
394  auto *U = dyn_cast<Instruction>(Us.getUser());
395  if (!U || U == LI || !DT.dominates(U, LI))
396  continue;
397 
398  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
399  // users. U = bitcast Ptr
400  if (isa<BitCastInst>(U)) {
401  LoadOperandsQueue.push_back(U);
402  continue;
403  }
404  // Gep with zeros is equivalent to bitcast.
405  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
406  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
407  // typeless pointers will be ready then both cases will be gone
408  // (and this BFS also won't be needed).
409  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
410  if (GEP->hasAllZeroIndices()) {
411  LoadOperandsQueue.push_back(U);
412  continue;
413  }
414 
415  // If we hit load/store with the same invariant.group metadata (and the
416  // same pointer operand) we can assume that value pointed by pointer
417  // operand didn't change.
418  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
419  U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
420  ClosestDependency = GetClosestDependency(ClosestDependency, U);
421  }
422  }
423 
424  if (!ClosestDependency)
425  return MemDepResult::getUnknown();
426  if (ClosestDependency->getParent() == BB)
427  return MemDepResult::getDef(ClosestDependency);
428  // Def(U) can't be returned here because it is non-local. If local
429  // dependency won't be found then return nonLocal counting that the
430  // user will call getNonLocalPointerDependency, which will return cached
431  // result.
432  NonLocalDefsCache.try_emplace(
433  LI, NonLocalDepResult(ClosestDependency->getParent(),
434  MemDepResult::getDef(ClosestDependency), nullptr));
435  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
436  return MemDepResult::getNonLocal();
437 }
438 
440  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
441  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
442  bool isInvariantLoad = false;
443 
444  if (!Limit) {
445  unsigned DefaultLimit = BlockScanLimit;
446  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
447  &DefaultLimit);
448  }
449 
450  // We must be careful with atomic accesses, as they may allow another thread
451  // to touch this location, clobbering it. We are conservative: if the
452  // QueryInst is not a simple (non-atomic) memory access, we automatically
453  // return getClobber.
454  // If it is simple, we know based on the results of
455  // "Compiler testing via a theory of sound optimisations in the C11/C++11
456  // memory model" in PLDI 2013, that a non-atomic location can only be
457  // clobbered between a pair of a release and an acquire action, with no
458  // access to the location in between.
459  // Here is an example for giving the general intuition behind this rule.
460  // In the following code:
461  // store x 0;
462  // release action; [1]
463  // acquire action; [4]
464  // %val = load x;
465  // It is unsafe to replace %val by 0 because another thread may be running:
466  // acquire action; [2]
467  // store x 42;
468  // release action; [3]
469  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
470  // being 42. A key property of this program however is that if either
471  // 1 or 4 were missing, there would be a race between the store of 42
472  // either the store of 0 or the load (making the whole program racy).
473  // The paper mentioned above shows that the same property is respected
474  // by every program that can detect any optimization of that kind: either
475  // it is racy (undefined) or there is a release followed by an acquire
476  // between the pair of accesses under consideration.
477 
478  // If the load is invariant, we "know" that it doesn't alias *any* write. We
479  // do want to respect mustalias results since defs are useful for value
480  // forwarding, but any mayalias write can be assumed to be noalias.
481  // Arguably, this logic should be pushed inside AliasAnalysis itself.
482  if (isLoad && QueryInst) {
483  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
484  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
485  isInvariantLoad = true;
486  }
487 
488  const DataLayout &DL = BB->getModule()->getDataLayout();
489 
490  // Create a numbered basic block to lazily compute and cache instruction
491  // positions inside a BB. This is used to provide fast queries for relative
492  // position between two instructions in a BB and can be used by
493  // AliasAnalysis::callCapturesBefore.
494  OrderedBasicBlock OBB(BB);
495 
496  // Return "true" if and only if the instruction I is either a non-simple
497  // load or a non-simple store.
498  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
499  if (auto *LI = dyn_cast<LoadInst>(I))
500  return !LI->isSimple();
501  if (auto *SI = dyn_cast<StoreInst>(I))
502  return !SI->isSimple();
503  return false;
504  };
505 
506  // Return "true" if I is not a load and not a store, but it does access
507  // memory.
508  auto isOtherMemAccess = [](Instruction *I) -> bool {
509  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
510  };
511 
512  // Walk backwards through the basic block, looking for dependencies.
513  while (ScanIt != BB->begin()) {
514  Instruction *Inst = &*--ScanIt;
515 
516  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
517  // Debug intrinsics don't (and can't) cause dependencies.
518  if (isa<DbgInfoIntrinsic>(II))
519  continue;
520 
521  // Limit the amount of scanning we do so we don't end up with quadratic
522  // running time on extreme testcases.
523  --*Limit;
524  if (!*Limit)
525  return MemDepResult::getUnknown();
526 
527  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
528  // If we reach a lifetime begin or end marker, then the query ends here
529  // because the value is undefined.
530  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
531  // FIXME: This only considers queries directly on the invariant-tagged
532  // pointer, not on query pointers that are indexed off of them. It'd
533  // be nice to handle that at some point (the right approach is to use
534  // GetPointerBaseWithConstantOffset).
535  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
536  return MemDepResult::getDef(II);
537  continue;
538  }
539  }
540 
541  // Values depend on loads if the pointers are must aliased. This means
542  // that a load depends on another must aliased load from the same value.
543  // One exception is atomic loads: a value can depend on an atomic load that
544  // it does not alias with when this atomic load indicates that another
545  // thread may be accessing the location.
546  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
547  // While volatile access cannot be eliminated, they do not have to clobber
548  // non-aliasing locations, as normal accesses, for example, can be safely
549  // reordered with volatile accesses.
550  if (LI->isVolatile()) {
551  if (!QueryInst)
552  // Original QueryInst *may* be volatile
553  return MemDepResult::getClobber(LI);
554  if (isVolatile(QueryInst))
555  // Ordering required if QueryInst is itself volatile
556  return MemDepResult::getClobber(LI);
557  // Otherwise, volatile doesn't imply any special ordering
558  }
559 
560  // Atomic loads have complications involved.
561  // A Monotonic (or higher) load is OK if the query inst is itself not
562  // atomic.
563  // FIXME: This is overly conservative.
564  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
565  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
566  isOtherMemAccess(QueryInst))
567  return MemDepResult::getClobber(LI);
568  if (LI->getOrdering() != AtomicOrdering::Monotonic)
569  return MemDepResult::getClobber(LI);
570  }
571 
572  MemoryLocation LoadLoc = MemoryLocation::get(LI);
573 
574  // If we found a pointer, check if it could be the same as our pointer.
575  AliasResult R = AA.alias(LoadLoc, MemLoc);
576 
577  if (isLoad) {
578  if (R == NoAlias)
579  continue;
580 
581  // Must aliased loads are defs of each other.
582  if (R == MustAlias)
583  return MemDepResult::getDef(Inst);
584 
585 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
586  // in terms of clobbering loads, but since it does this by looking
587  // at the clobbering load directly, it doesn't know about any
588  // phi translation that may have happened along the way.
589 
590  // If we have a partial alias, then return this as a clobber for the
591  // client to handle.
592  if (R == PartialAlias)
593  return MemDepResult::getClobber(Inst);
594 #endif
595 
596  // Random may-alias loads don't depend on each other without a
597  // dependence.
598  continue;
599  }
600 
601  // Stores don't depend on other no-aliased accesses.
602  if (R == NoAlias)
603  continue;
604 
605  // Stores don't alias loads from read-only memory.
606  if (AA.pointsToConstantMemory(LoadLoc))
607  continue;
608 
609  // Stores depend on may/must aliased loads.
610  return MemDepResult::getDef(Inst);
611  }
612 
613  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
614  // Atomic stores have complications involved.
615  // A Monotonic store is OK if the query inst is itself not atomic.
616  // FIXME: This is overly conservative.
617  if (!SI->isUnordered() && SI->isAtomic()) {
618  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
619  isOtherMemAccess(QueryInst))
621  if (SI->getOrdering() != AtomicOrdering::Monotonic)
623  }
624 
625  // FIXME: this is overly conservative.
626  // While volatile access cannot be eliminated, they do not have to clobber
627  // non-aliasing locations, as normal accesses can for example be reordered
628  // with volatile accesses.
629  if (SI->isVolatile())
630  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
631  isOtherMemAccess(QueryInst))
633 
634  // If alias analysis can tell that this store is guaranteed to not modify
635  // the query pointer, ignore it. Use getModRefInfo to handle cases where
636  // the query pointer points to constant memory etc.
637  if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
638  continue;
639 
640  // Ok, this store might clobber the query pointer. Check to see if it is
641  // a must alias: in this case, we want to return this as a def.
642  // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
644 
645  // If we found a pointer, check if it could be the same as our pointer.
646  AliasResult R = AA.alias(StoreLoc, MemLoc);
647 
648  if (R == NoAlias)
649  continue;
650  if (R == MustAlias)
651  return MemDepResult::getDef(Inst);
652  if (isInvariantLoad)
653  continue;
654  return MemDepResult::getClobber(Inst);
655  }
656 
657  // If this is an allocation, and if we know that the accessed pointer is to
658  // the allocation, return Def. This means that there is no dependence and
659  // the access can be optimized based on that. For example, a load could
660  // turn into undef. Note that we can bypass the allocation itself when
661  // looking for a clobber in many cases; that's an alias property and is
662  // handled by BasicAA.
663  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
664  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
665  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
666  return MemDepResult::getDef(Inst);
667  }
668 
669  if (isInvariantLoad)
670  continue;
671 
672  // A release fence requires that all stores complete before it, but does
673  // not prevent the reordering of following loads or stores 'before' the
674  // fence. As a result, we look past it when finding a dependency for
675  // loads. DSE uses this to find preceding stores to delete and thus we
676  // can't bypass the fence if the query instruction is a store.
677  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
678  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
679  continue;
680 
681  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
682  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
683  // If necessary, perform additional analysis.
684  if (isModAndRefSet(MR))
685  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
686  switch (clearMust(MR)) {
688  // If the call has no effect on the queried pointer, just ignore it.
689  continue;
690  case ModRefInfo::Mod:
691  return MemDepResult::getClobber(Inst);
692  case ModRefInfo::Ref:
693  // If the call is known to never store to the pointer, and if this is a
694  // load query, we can safely ignore it (scan past it).
695  if (isLoad)
696  continue;
698  default:
699  // Otherwise, there is a potential dependence. Return a clobber.
700  return MemDepResult::getClobber(Inst);
701  }
702  }
703 
704  // No dependence found. If this is the entry block of the function, it is
705  // unknown, otherwise it is non-local.
706  if (BB != &BB->getParent()->getEntryBlock())
707  return MemDepResult::getNonLocal();
709 }
710 
712  Instruction *ScanPos = QueryInst;
713 
714  // Check for a cached result
715  MemDepResult &LocalCache = LocalDeps[QueryInst];
716 
717  // If the cached entry is non-dirty, just return it. Note that this depends
718  // on MemDepResult's default constructing to 'dirty'.
719  if (!LocalCache.isDirty())
720  return LocalCache;
721 
722  // Otherwise, if we have a dirty entry, we know we can start the scan at that
723  // instruction, which may save us some work.
724  if (Instruction *Inst = LocalCache.getInst()) {
725  ScanPos = Inst;
726 
727  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
728  }
729 
730  BasicBlock *QueryParent = QueryInst->getParent();
731 
732  // Do the scan.
733  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
734  // No dependence found. If this is the entry block of the function, it is
735  // unknown, otherwise it is non-local.
736  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
737  LocalCache = MemDepResult::getNonLocal();
738  else
739  LocalCache = MemDepResult::getNonFuncLocal();
740  } else {
741  MemoryLocation MemLoc;
742  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
743  if (MemLoc.Ptr) {
744  // If we can do a pointer scan, make it happen.
745  bool isLoad = !isModSet(MR);
746  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
747  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
748 
749  LocalCache = getPointerDependencyFrom(
750  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
751  } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
752  bool isReadOnly = AA.onlyReadsMemory(QueryCall);
753  LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
754  ScanPos->getIterator(), QueryParent);
755  } else
756  // Non-memory instruction.
757  LocalCache = MemDepResult::getUnknown();
758  }
759 
760  // Remember the result!
761  if (Instruction *I = LocalCache.getInst())
762  ReverseLocalDeps[I].insert(QueryInst);
763 
764  return LocalCache;
765 }
766 
767 #ifndef NDEBUG
768 /// This method is used when -debug is specified to verify that cache arrays
769 /// are properly kept sorted.
771  int Count = -1) {
772  if (Count == -1)
773  Count = Cache.size();
774  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
775  "Cache isn't sorted!");
776 }
777 #endif
778 
781  assert(getDependency(QueryCall).isNonLocal() &&
782  "getNonLocalCallDependency should only be used on calls with "
783  "non-local deps!");
784  PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
785  NonLocalDepInfo &Cache = CacheP.first;
786 
787  // This is the set of blocks that need to be recomputed. In the cached case,
788  // this can happen due to instructions being deleted etc. In the uncached
789  // case, this starts out as the set of predecessors we care about.
790  SmallVector<BasicBlock *, 32> DirtyBlocks;
791 
792  if (!Cache.empty()) {
793  // Okay, we have a cache entry. If we know it is not dirty, just return it
794  // with no computation.
795  if (!CacheP.second) {
796  ++NumCacheNonLocal;
797  return Cache;
798  }
799 
800  // If we already have a partially computed set of results, scan them to
801  // determine what is dirty, seeding our initial DirtyBlocks worklist.
802  for (auto &Entry : Cache)
803  if (Entry.getResult().isDirty())
804  DirtyBlocks.push_back(Entry.getBB());
805 
806  // Sort the cache so that we can do fast binary search lookups below.
807  llvm::sort(Cache);
808 
809  ++NumCacheDirtyNonLocal;
810  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
811  // << Cache.size() << " cached: " << *QueryInst;
812  } else {
813  // Seed DirtyBlocks with each of the preds of QueryInst's block.
814  BasicBlock *QueryBB = QueryCall->getParent();
815  for (BasicBlock *Pred : PredCache.get(QueryBB))
816  DirtyBlocks.push_back(Pred);
817  ++NumUncacheNonLocal;
818  }
819 
820  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
821  bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
822 
824 
825  unsigned NumSortedEntries = Cache.size();
826  LLVM_DEBUG(AssertSorted(Cache));
827 
828  // Iterate while we still have blocks to update.
829  while (!DirtyBlocks.empty()) {
830  BasicBlock *DirtyBB = DirtyBlocks.back();
831  DirtyBlocks.pop_back();
832 
833  // Already processed this block?
834  if (!Visited.insert(DirtyBB).second)
835  continue;
836 
837  // Do a binary search to see if we already have an entry for this block in
838  // the cache set. If so, find it.
839  LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
840  NonLocalDepInfo::iterator Entry =
841  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
842  NonLocalDepEntry(DirtyBB));
843  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
844  --Entry;
845 
846  NonLocalDepEntry *ExistingResult = nullptr;
847  if (Entry != Cache.begin() + NumSortedEntries &&
848  Entry->getBB() == DirtyBB) {
849  // If we already have an entry, and if it isn't already dirty, the block
850  // is done.
851  if (!Entry->getResult().isDirty())
852  continue;
853 
854  // Otherwise, remember this slot so we can update the value.
855  ExistingResult = &*Entry;
856  }
857 
858  // If the dirty entry has a pointer, start scanning from it so we don't have
859  // to rescan the entire block.
860  BasicBlock::iterator ScanPos = DirtyBB->end();
861  if (ExistingResult) {
862  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
863  ScanPos = Inst->getIterator();
864  // We're removing QueryInst's use of Inst.
865  RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
866  QueryCall);
867  }
868  }
869 
870  // Find out if this block has a local dependency for QueryInst.
871  MemDepResult Dep;
872 
873  if (ScanPos != DirtyBB->begin()) {
874  Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
875  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
876  // No dependence found. If this is the entry block of the function, it is
877  // a clobber, otherwise it is unknown.
879  } else {
881  }
882 
883  // If we had a dirty entry for the block, update it. Otherwise, just add
884  // a new entry.
885  if (ExistingResult)
886  ExistingResult->setResult(Dep);
887  else
888  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
889 
890  // If the block has a dependency (i.e. it isn't completely transparent to
891  // the value), remember the association!
892  if (!Dep.isNonLocal()) {
893  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
894  // update this when we remove instructions.
895  if (Instruction *Inst = Dep.getInst())
896  ReverseNonLocalDeps[Inst].insert(QueryCall);
897  } else {
898 
899  // If the block *is* completely transparent to the load, we need to check
900  // the predecessors of this block. Add them to our worklist.
901  for (BasicBlock *Pred : PredCache.get(DirtyBB))
902  DirtyBlocks.push_back(Pred);
903  }
904  }
905 
906  return Cache;
907 }
908 
910  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
911  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
912  bool isLoad = isa<LoadInst>(QueryInst);
913  BasicBlock *FromBB = QueryInst->getParent();
914  assert(FromBB);
915 
916  assert(Loc.Ptr->getType()->isPointerTy() &&
917  "Can't get pointer deps of a non-pointer!");
918  Result.clear();
919  {
920  // Check if there is cached Def with invariant.group.
921  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
922  if (NonLocalDefIt != NonLocalDefsCache.end()) {
923  Result.push_back(NonLocalDefIt->second);
924  ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
925  .erase(QueryInst);
926  NonLocalDefsCache.erase(NonLocalDefIt);
927  return;
928  }
929  }
930  // This routine does not expect to deal with volatile instructions.
931  // Doing so would require piping through the QueryInst all the way through.
932  // TODO: volatiles can't be elided, but they can be reordered with other
933  // non-volatile accesses.
934 
935  // We currently give up on any instruction which is ordered, but we do handle
936  // atomic instructions which are unordered.
937  // TODO: Handle ordered instructions
938  auto isOrdered = [](Instruction *Inst) {
939  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
940  return !LI->isUnordered();
941  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
942  return !SI->isUnordered();
943  }
944  return false;
945  };
946  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
948  const_cast<Value *>(Loc.Ptr)));
949  return;
950  }
951  const DataLayout &DL = FromBB->getModule()->getDataLayout();
952  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
953 
954  // This is the set of blocks we've inspected, and the pointer we consider in
955  // each block. Because of critical edges, we currently bail out if querying
956  // a block with multiple different pointers. This can happen during PHI
957  // translation.
959  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
960  Result, Visited, true))
961  return;
962  Result.clear();
964  const_cast<Value *>(Loc.Ptr)));
965 }
966 
967 /// Compute the memdep value for BB with Pointer/PointeeSize using either
968 /// cached information in Cache or by doing a lookup (which may use dirty cache
969 /// info if available).
970 ///
971 /// If we do a lookup, add the result to the cache.
972 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
973  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
974  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
975 
976  // Do a binary search to see if we already have an entry for this block in
977  // the cache set. If so, find it.
978  NonLocalDepInfo::iterator Entry = std::upper_bound(
979  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
980  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
981  --Entry;
982 
983  NonLocalDepEntry *ExistingResult = nullptr;
984  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
985  ExistingResult = &*Entry;
986 
987  // If we have a cached entry, and it is non-dirty, use it as the value for
988  // this dependency.
989  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
990  ++NumCacheNonLocalPtr;
991  return ExistingResult->getResult();
992  }
993 
994  // Otherwise, we have to scan for the value. If we have a dirty cache
995  // entry, start scanning from its position, otherwise we scan from the end
996  // of the block.
997  BasicBlock::iterator ScanPos = BB->end();
998  if (ExistingResult && ExistingResult->getResult().getInst()) {
999  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1000  "Instruction invalidated?");
1001  ++NumCacheDirtyNonLocalPtr;
1002  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1003 
1004  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1005  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1006  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1007  } else {
1008  ++NumUncacheNonLocalPtr;
1009  }
1010 
1011  // Scan the block for the dependency.
1012  MemDepResult Dep =
1013  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1014 
1015  // If we had a dirty entry for the block, update it. Otherwise, just add
1016  // a new entry.
1017  if (ExistingResult)
1018  ExistingResult->setResult(Dep);
1019  else
1020  Cache->push_back(NonLocalDepEntry(BB, Dep));
1021 
1022  // If the block has a dependency (i.e. it isn't completely transparent to
1023  // the value), remember the reverse association because we just added it
1024  // to Cache!
1025  if (!Dep.isDef() && !Dep.isClobber())
1026  return Dep;
1027 
1028  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1029  // update MemDep when we remove instructions.
1030  Instruction *Inst = Dep.getInst();
1031  assert(Inst && "Didn't depend on anything?");
1032  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1033  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1034  return Dep;
1035 }
1036 
1037 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1038 /// array that are already properly ordered.
1039 ///
1040 /// This is optimized for the case when only a few entries are added.
1041 static void
1043  unsigned NumSortedEntries) {
1044  switch (Cache.size() - NumSortedEntries) {
1045  case 0:
1046  // done, no new entries.
1047  break;
1048  case 2: {
1049  // Two new entries, insert the last one into place.
1050  NonLocalDepEntry Val = Cache.back();
1051  Cache.pop_back();
1052  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1053  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1054  Cache.insert(Entry, Val);
1056  }
1057  case 1:
1058  // One new entry, Just insert the new value at the appropriate position.
1059  if (Cache.size() != 1) {
1060  NonLocalDepEntry Val = Cache.back();
1061  Cache.pop_back();
1062  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1063  std::upper_bound(Cache.begin(), Cache.end(), Val);
1064  Cache.insert(Entry, Val);
1065  }
1066  break;
1067  default:
1068  // Added many values, do a full scale sort.
1069  llvm::sort(Cache);
1070  break;
1071  }
1072 }
1073 
1074 /// Perform a dependency query based on pointer/pointeesize starting at the end
1075 /// of StartBB.
1076 ///
1077 /// Add any clobber/def results to the results vector and keep track of which
1078 /// blocks are visited in 'Visited'.
1079 ///
1080 /// This has special behavior for the first block queries (when SkipFirstBlock
1081 /// is true). In this special case, it ignores the contents of the specified
1082 /// block and starts returning dependence info for its predecessors.
1083 ///
1084 /// This function returns true on success, or false to indicate that it could
1085 /// not compute dependence information for some reason. This should be treated
1086 /// as a clobber dependence on the first instruction in the predecessor block.
1087 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1088  Instruction *QueryInst, const PHITransAddr &Pointer,
1089  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1091  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1092  // Look up the cached info for Pointer.
1093  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1094 
1095  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1096  // CacheKey, this value will be inserted as the associated value. Otherwise,
1097  // it'll be ignored, and we'll have to check to see if the cached size and
1098  // aa tags are consistent with the current query.
1099  NonLocalPointerInfo InitialNLPI;
1100  InitialNLPI.Size = Loc.Size;
1101  InitialNLPI.AATags = Loc.AATags;
1102 
1103  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1104  // already have one.
1105  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1106  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1107  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1108 
1109  // If we already have a cache entry for this CacheKey, we may need to do some
1110  // work to reconcile the cache entry and the current query.
1111  if (!Pair.second) {
1112  if (CacheInfo->Size != Loc.Size) {
1113  bool ThrowOutEverything;
1114  if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1115  // FIXME: We may be able to do better in the face of results with mixed
1116  // precision. We don't appear to get them in practice, though, so just
1117  // be conservative.
1118  ThrowOutEverything =
1119  CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1120  CacheInfo->Size.getValue() < Loc.Size.getValue();
1121  } else {
1122  // For our purposes, unknown size > all others.
1123  ThrowOutEverything = !Loc.Size.hasValue();
1124  }
1125 
1126  if (ThrowOutEverything) {
1127  // The query's Size is greater than the cached one. Throw out the
1128  // cached data and proceed with the query at the greater size.
1129  CacheInfo->Pair = BBSkipFirstBlockPair();
1130  CacheInfo->Size = Loc.Size;
1131  for (auto &Entry : CacheInfo->NonLocalDeps)
1132  if (Instruction *Inst = Entry.getResult().getInst())
1133  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1134  CacheInfo->NonLocalDeps.clear();
1135  } else {
1136  // This query's Size is less than the cached one. Conservatively restart
1137  // the query using the greater size.
1138  return getNonLocalPointerDepFromBB(
1139  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1140  StartBB, Result, Visited, SkipFirstBlock);
1141  }
1142  }
1143 
1144  // If the query's AATags are inconsistent with the cached one,
1145  // conservatively throw out the cached data and restart the query with
1146  // no tag if needed.
1147  if (CacheInfo->AATags != Loc.AATags) {
1148  if (CacheInfo->AATags) {
1149  CacheInfo->Pair = BBSkipFirstBlockPair();
1150  CacheInfo->AATags = AAMDNodes();
1151  for (auto &Entry : CacheInfo->NonLocalDeps)
1152  if (Instruction *Inst = Entry.getResult().getInst())
1153  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1154  CacheInfo->NonLocalDeps.clear();
1155  }
1156  if (Loc.AATags)
1157  return getNonLocalPointerDepFromBB(
1158  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1159  Visited, SkipFirstBlock);
1160  }
1161  }
1162 
1163  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1164 
1165  // If we have valid cached information for exactly the block we are
1166  // investigating, just return it with no recomputation.
1167  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1168  // We have a fully cached result for this query then we can just return the
1169  // cached results and populate the visited set. However, we have to verify
1170  // that we don't already have conflicting results for these blocks. Check
1171  // to ensure that if a block in the results set is in the visited set that
1172  // it was for the same pointer query.
1173  if (!Visited.empty()) {
1174  for (auto &Entry : *Cache) {
1176  Visited.find(Entry.getBB());
1177  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1178  continue;
1179 
1180  // We have a pointer mismatch in a block. Just return false, saying
1181  // that something was clobbered in this result. We could also do a
1182  // non-fully cached query, but there is little point in doing this.
1183  return false;
1184  }
1185  }
1186 
1187  Value *Addr = Pointer.getAddr();
1188  for (auto &Entry : *Cache) {
1189  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1190  if (Entry.getResult().isNonLocal()) {
1191  continue;
1192  }
1193 
1194  if (DT.isReachableFromEntry(Entry.getBB())) {
1195  Result.push_back(
1196  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1197  }
1198  }
1199  ++NumCacheCompleteNonLocalPtr;
1200  return true;
1201  }
1202 
1203  // Otherwise, either this is a new block, a block with an invalid cache
1204  // pointer or one that we're about to invalidate by putting more info into it
1205  // than its valid cache info. If empty, the result will be valid cache info,
1206  // otherwise it isn't.
1207  if (Cache->empty())
1208  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1209  else
1210  CacheInfo->Pair = BBSkipFirstBlockPair();
1211 
1213  Worklist.push_back(StartBB);
1214 
1215  // PredList used inside loop.
1217 
1218  // Keep track of the entries that we know are sorted. Previously cached
1219  // entries will all be sorted. The entries we add we only sort on demand (we
1220  // don't insert every element into its sorted position). We know that we
1221  // won't get any reuse from currently inserted values, because we don't
1222  // revisit blocks after we insert info for them.
1223  unsigned NumSortedEntries = Cache->size();
1224  unsigned WorklistEntries = BlockNumberLimit;
1225  bool GotWorklistLimit = false;
1226  LLVM_DEBUG(AssertSorted(*Cache));
1227 
1228  while (!Worklist.empty()) {
1229  BasicBlock *BB = Worklist.pop_back_val();
1230 
1231  // If we do process a large number of blocks it becomes very expensive and
1232  // likely it isn't worth worrying about
1233  if (Result.size() > NumResultsLimit) {
1234  Worklist.clear();
1235  // Sort it now (if needed) so that recursive invocations of
1236  // getNonLocalPointerDepFromBB and other routines that could reuse the
1237  // cache value will only see properly sorted cache arrays.
1238  if (Cache && NumSortedEntries != Cache->size()) {
1239  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1240  }
1241  // Since we bail out, the "Cache" set won't contain all of the
1242  // results for the query. This is ok (we can still use it to accelerate
1243  // specific block queries) but we can't do the fastpath "return all
1244  // results from the set". Clear out the indicator for this.
1245  CacheInfo->Pair = BBSkipFirstBlockPair();
1246  return false;
1247  }
1248 
1249  // Skip the first block if we have it.
1250  if (!SkipFirstBlock) {
1251  // Analyze the dependency of *Pointer in FromBB. See if we already have
1252  // been here.
1253  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1254 
1255  // Get the dependency info for Pointer in BB. If we have cached
1256  // information, we will use it, otherwise we compute it.
1257  LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1258  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1259  Cache, NumSortedEntries);
1260 
1261  // If we got a Def or Clobber, add this to the list of results.
1262  if (!Dep.isNonLocal()) {
1263  if (DT.isReachableFromEntry(BB)) {
1264  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1265  continue;
1266  }
1267  }
1268  }
1269 
1270  // If 'Pointer' is an instruction defined in this block, then we need to do
1271  // phi translation to change it into a value live in the predecessor block.
1272  // If not, we just add the predecessors to the worklist and scan them with
1273  // the same Pointer.
1274  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1275  SkipFirstBlock = false;
1277  for (BasicBlock *Pred : PredCache.get(BB)) {
1278  // Verify that we haven't looked at this block yet.
1279  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1280  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1281  if (InsertRes.second) {
1282  // First time we've looked at *PI.
1283  NewBlocks.push_back(Pred);
1284  continue;
1285  }
1286 
1287  // If we have seen this block before, but it was with a different
1288  // pointer then we have a phi translation failure and we have to treat
1289  // this as a clobber.
1290  if (InsertRes.first->second != Pointer.getAddr()) {
1291  // Make sure to clean up the Visited map before continuing on to
1292  // PredTranslationFailure.
1293  for (unsigned i = 0; i < NewBlocks.size(); i++)
1294  Visited.erase(NewBlocks[i]);
1295  goto PredTranslationFailure;
1296  }
1297  }
1298  if (NewBlocks.size() > WorklistEntries) {
1299  // Make sure to clean up the Visited map before continuing on to
1300  // PredTranslationFailure.
1301  for (unsigned i = 0; i < NewBlocks.size(); i++)
1302  Visited.erase(NewBlocks[i]);
1303  GotWorklistLimit = true;
1304  goto PredTranslationFailure;
1305  }
1306  WorklistEntries -= NewBlocks.size();
1307  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1308  continue;
1309  }
1310 
1311  // We do need to do phi translation, if we know ahead of time we can't phi
1312  // translate this value, don't even try.
1313  if (!Pointer.IsPotentiallyPHITranslatable())
1314  goto PredTranslationFailure;
1315 
1316  // We may have added values to the cache list before this PHI translation.
1317  // If so, we haven't done anything to ensure that the cache remains sorted.
1318  // Sort it now (if needed) so that recursive invocations of
1319  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1320  // value will only see properly sorted cache arrays.
1321  if (Cache && NumSortedEntries != Cache->size()) {
1322  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1323  NumSortedEntries = Cache->size();
1324  }
1325  Cache = nullptr;
1326 
1327  PredList.clear();
1328  for (BasicBlock *Pred : PredCache.get(BB)) {
1329  PredList.push_back(std::make_pair(Pred, Pointer));
1330 
1331  // Get the PHI translated pointer in this predecessor. This can fail if
1332  // not translatable, in which case the getAddr() returns null.
1333  PHITransAddr &PredPointer = PredList.back().second;
1334  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1335  Value *PredPtrVal = PredPointer.getAddr();
1336 
1337  // Check to see if we have already visited this pred block with another
1338  // pointer. If so, we can't do this lookup. This failure can occur
1339  // with PHI translation when a critical edge exists and the PHI node in
1340  // the successor translates to a pointer value different than the
1341  // pointer the block was first analyzed with.
1342  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1343  Visited.insert(std::make_pair(Pred, PredPtrVal));
1344 
1345  if (!InsertRes.second) {
1346  // We found the pred; take it off the list of preds to visit.
1347  PredList.pop_back();
1348 
1349  // If the predecessor was visited with PredPtr, then we already did
1350  // the analysis and can ignore it.
1351  if (InsertRes.first->second == PredPtrVal)
1352  continue;
1353 
1354  // Otherwise, the block was previously analyzed with a different
1355  // pointer. We can't represent the result of this case, so we just
1356  // treat this as a phi translation failure.
1357 
1358  // Make sure to clean up the Visited map before continuing on to
1359  // PredTranslationFailure.
1360  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1361  Visited.erase(PredList[i].first);
1362 
1363  goto PredTranslationFailure;
1364  }
1365  }
1366 
1367  // Actually process results here; this need to be a separate loop to avoid
1368  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1369  // any results for. (getNonLocalPointerDepFromBB will modify our
1370  // datastructures in ways the code after the PredTranslationFailure label
1371  // doesn't expect.)
1372  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1373  BasicBlock *Pred = PredList[i].first;
1374  PHITransAddr &PredPointer = PredList[i].second;
1375  Value *PredPtrVal = PredPointer.getAddr();
1376 
1377  bool CanTranslate = true;
1378  // If PHI translation was unable to find an available pointer in this
1379  // predecessor, then we have to assume that the pointer is clobbered in
1380  // that predecessor. We can still do PRE of the load, which would insert
1381  // a computation of the pointer in this predecessor.
1382  if (!PredPtrVal)
1383  CanTranslate = false;
1384 
1385  // FIXME: it is entirely possible that PHI translating will end up with
1386  // the same value. Consider PHI translating something like:
1387  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1388  // to recurse here, pedantically speaking.
1389 
1390  // If getNonLocalPointerDepFromBB fails here, that means the cached
1391  // result conflicted with the Visited list; we have to conservatively
1392  // assume it is unknown, but this also does not block PRE of the load.
1393  if (!CanTranslate ||
1394  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1395  Loc.getWithNewPtr(PredPtrVal), isLoad,
1396  Pred, Result, Visited)) {
1397  // Add the entry to the Result list.
1398  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1399  Result.push_back(Entry);
1400 
1401  // Since we had a phi translation failure, the cache for CacheKey won't
1402  // include all of the entries that we need to immediately satisfy future
1403  // queries. Mark this in NonLocalPointerDeps by setting the
1404  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1405  // cached value to do more work but not miss the phi trans failure.
1406  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1407  NLPI.Pair = BBSkipFirstBlockPair();
1408  continue;
1409  }
1410  }
1411 
1412  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1413  CacheInfo = &NonLocalPointerDeps[CacheKey];
1414  Cache = &CacheInfo->NonLocalDeps;
1415  NumSortedEntries = Cache->size();
1416 
1417  // Since we did phi translation, the "Cache" set won't contain all of the
1418  // results for the query. This is ok (we can still use it to accelerate
1419  // specific block queries) but we can't do the fastpath "return all
1420  // results from the set" Clear out the indicator for this.
1421  CacheInfo->Pair = BBSkipFirstBlockPair();
1422  SkipFirstBlock = false;
1423  continue;
1424 
1425  PredTranslationFailure:
1426  // The following code is "failure"; we can't produce a sane translation
1427  // for the given block. It assumes that we haven't modified any of
1428  // our datastructures while processing the current block.
1429 
1430  if (!Cache) {
1431  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1432  CacheInfo = &NonLocalPointerDeps[CacheKey];
1433  Cache = &CacheInfo->NonLocalDeps;
1434  NumSortedEntries = Cache->size();
1435  }
1436 
1437  // Since we failed phi translation, the "Cache" set won't contain all of the
1438  // results for the query. This is ok (we can still use it to accelerate
1439  // specific block queries) but we can't do the fastpath "return all
1440  // results from the set". Clear out the indicator for this.
1441  CacheInfo->Pair = BBSkipFirstBlockPair();
1442 
1443  // If *nothing* works, mark the pointer as unknown.
1444  //
1445  // If this is the magic first block, return this as a clobber of the whole
1446  // incoming value. Since we can't phi translate to one of the predecessors,
1447  // we have to bail out.
1448  if (SkipFirstBlock)
1449  return false;
1450 
1451  bool foundBlock = false;
1452  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1453  if (I.getBB() != BB)
1454  continue;
1455 
1456  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1457  !DT.isReachableFromEntry(BB)) &&
1458  "Should only be here with transparent block");
1459  foundBlock = true;
1460  I.setResult(MemDepResult::getUnknown());
1461  Result.push_back(
1462  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1463  break;
1464  }
1465  (void)foundBlock; (void)GotWorklistLimit;
1466  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1467  }
1468 
1469  // Okay, we're done now. If we added new values to the cache, re-sort it.
1470  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1471  LLVM_DEBUG(AssertSorted(*Cache));
1472  return true;
1473 }
1474 
1475 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1476 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1477  ValueIsLoadPair P) {
1478 
1479  // Most of the time this cache is empty.
1480  if (!NonLocalDefsCache.empty()) {
1481  auto it = NonLocalDefsCache.find(P.getPointer());
1482  if (it != NonLocalDefsCache.end()) {
1483  RemoveFromReverseMap(ReverseNonLocalDefsCache,
1484  it->second.getResult().getInst(), P.getPointer());
1485  NonLocalDefsCache.erase(it);
1486  }
1487 
1488  if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1489  auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1490  if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1491  for (const auto &entry : toRemoveIt->second)
1492  NonLocalDefsCache.erase(entry);
1493  ReverseNonLocalDefsCache.erase(toRemoveIt);
1494  }
1495  }
1496  }
1497 
1498  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1499  if (It == NonLocalPointerDeps.end())
1500  return;
1501 
1502  // Remove all of the entries in the BB->val map. This involves removing
1503  // instructions from the reverse map.
1504  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1505 
1506  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1507  Instruction *Target = PInfo[i].getResult().getInst();
1508  if (!Target)
1509  continue; // Ignore non-local dep results.
1510  assert(Target->getParent() == PInfo[i].getBB());
1511 
1512  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1513  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1514  }
1515 
1516  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1517  NonLocalPointerDeps.erase(It);
1518 }
1519 
1521  // If Ptr isn't really a pointer, just ignore it.
1522  if (!Ptr->getType()->isPointerTy())
1523  return;
1524  // Flush store info for the pointer.
1525  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1526  // Flush load info for the pointer.
1527  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1528  // Invalidate phis that use the pointer.
1529  PV.invalidateValue(Ptr);
1530 }
1531 
1533  PredCache.clear();
1534 }
1535 
1537  // Walk through the Non-local dependencies, removing this one as the value
1538  // for any cached queries.
1539  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1540  if (NLDI != NonLocalDeps.end()) {
1541  NonLocalDepInfo &BlockMap = NLDI->second.first;
1542  for (auto &Entry : BlockMap)
1543  if (Instruction *Inst = Entry.getResult().getInst())
1544  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1545  NonLocalDeps.erase(NLDI);
1546  }
1547 
1548  // If we have a cached local dependence query for this instruction, remove it.
1549  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1550  if (LocalDepEntry != LocalDeps.end()) {
1551  // Remove us from DepInst's reverse set now that the local dep info is gone.
1552  if (Instruction *Inst = LocalDepEntry->second.getInst())
1553  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1554 
1555  // Remove this local dependency info.
1556  LocalDeps.erase(LocalDepEntry);
1557  }
1558 
1559  // If we have any cached pointer dependencies on this instruction, remove
1560  // them. If the instruction has non-pointer type, then it can't be a pointer
1561  // base.
1562 
1563  // Remove it from both the load info and the store info. The instruction
1564  // can't be in either of these maps if it is non-pointer.
1565  if (RemInst->getType()->isPointerTy()) {
1566  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1567  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1568  }
1569 
1570  // Loop over all of the things that depend on the instruction we're removing.
1572 
1573  // If we find RemInst as a clobber or Def in any of the maps for other values,
1574  // we need to replace its entry with a dirty version of the instruction after
1575  // it. If RemInst is a terminator, we use a null dirty value.
1576  //
1577  // Using a dirty version of the instruction after RemInst saves having to scan
1578  // the entire block to get to this point.
1579  MemDepResult NewDirtyVal;
1580  if (!RemInst->isTerminator())
1581  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1582 
1583  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1584  if (ReverseDepIt != ReverseLocalDeps.end()) {
1585  // RemInst can't be the terminator if it has local stuff depending on it.
1586  assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1587  "Nothing can locally depend on a terminator");
1588 
1589  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1590  assert(InstDependingOnRemInst != RemInst &&
1591  "Already removed our local dep info");
1592 
1593  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1594 
1595  // Make sure to remember that new things depend on NewDepInst.
1596  assert(NewDirtyVal.getInst() &&
1597  "There is no way something else can have "
1598  "a local dep on this if it is a terminator!");
1599  ReverseDepsToAdd.push_back(
1600  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1601  }
1602 
1603  ReverseLocalDeps.erase(ReverseDepIt);
1604 
1605  // Add new reverse deps after scanning the set, to avoid invalidating the
1606  // 'ReverseDeps' reference.
1607  while (!ReverseDepsToAdd.empty()) {
1608  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1609  ReverseDepsToAdd.back().second);
1610  ReverseDepsToAdd.pop_back();
1611  }
1612  }
1613 
1614  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1615  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1616  for (Instruction *I : ReverseDepIt->second) {
1617  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1618 
1619  PerInstNLInfo &INLD = NonLocalDeps[I];
1620  // The information is now dirty!
1621  INLD.second = true;
1622 
1623  for (auto &Entry : INLD.first) {
1624  if (Entry.getResult().getInst() != RemInst)
1625  continue;
1626 
1627  // Convert to a dirty entry for the subsequent instruction.
1628  Entry.setResult(NewDirtyVal);
1629 
1630  if (Instruction *NextI = NewDirtyVal.getInst())
1631  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1632  }
1633  }
1634 
1635  ReverseNonLocalDeps.erase(ReverseDepIt);
1636 
1637  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1638  while (!ReverseDepsToAdd.empty()) {
1639  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1640  ReverseDepsToAdd.back().second);
1641  ReverseDepsToAdd.pop_back();
1642  }
1643  }
1644 
1645  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1646  // value in the NonLocalPointerDeps info.
1647  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1648  ReverseNonLocalPtrDeps.find(RemInst);
1649  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1651  ReversePtrDepsToAdd;
1652 
1653  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1654  assert(P.getPointer() != RemInst &&
1655  "Already removed NonLocalPointerDeps info for RemInst");
1656 
1657  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1658 
1659  // The cache is not valid for any specific block anymore.
1660  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1661 
1662  // Update any entries for RemInst to use the instruction after it.
1663  for (auto &Entry : NLPDI) {
1664  if (Entry.getResult().getInst() != RemInst)
1665  continue;
1666 
1667  // Convert to a dirty entry for the subsequent instruction.
1668  Entry.setResult(NewDirtyVal);
1669 
1670  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1671  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1672  }
1673 
1674  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1675  // subsequent value may invalidate the sortedness.
1676  llvm::sort(NLPDI);
1677  }
1678 
1679  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1680 
1681  while (!ReversePtrDepsToAdd.empty()) {
1682  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1683  ReversePtrDepsToAdd.back().second);
1684  ReversePtrDepsToAdd.pop_back();
1685  }
1686  }
1687 
1688  // Invalidate phis that use the removed instruction.
1689  PV.invalidateValue(RemInst);
1690 
1691  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1692  LLVM_DEBUG(verifyRemoved(RemInst));
1693 }
1694 
1695 /// Verify that the specified instruction does not occur in our internal data
1696 /// structures.
1697 ///
1698 /// This function verifies by asserting in debug builds.
1699 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1700 #ifndef NDEBUG
1701  for (const auto &DepKV : LocalDeps) {
1702  assert(DepKV.first != D && "Inst occurs in data structures");
1703  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1704  }
1705 
1706  for (const auto &DepKV : NonLocalPointerDeps) {
1707  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1708  for (const auto &Entry : DepKV.second.NonLocalDeps)
1709  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1710  }
1711 
1712  for (const auto &DepKV : NonLocalDeps) {
1713  assert(DepKV.first != D && "Inst occurs in data structures");
1714  const PerInstNLInfo &INLD = DepKV.second;
1715  for (const auto &Entry : INLD.first)
1716  assert(Entry.getResult().getInst() != D &&
1717  "Inst occurs in data structures");
1718  }
1719 
1720  for (const auto &DepKV : ReverseLocalDeps) {
1721  assert(DepKV.first != D && "Inst occurs in data structures");
1722  for (Instruction *Inst : DepKV.second)
1723  assert(Inst != D && "Inst occurs in data structures");
1724  }
1725 
1726  for (const auto &DepKV : ReverseNonLocalDeps) {
1727  assert(DepKV.first != D && "Inst occurs in data structures");
1728  for (Instruction *Inst : DepKV.second)
1729  assert(Inst != D && "Inst occurs in data structures");
1730  }
1731 
1732  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1733  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1734 
1735  for (ValueIsLoadPair P : DepKV.second)
1736  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1737  "Inst occurs in ReverseNonLocalPtrDeps map");
1738  }
1739 #endif
1740 }
1741 
1742 AnalysisKey MemoryDependenceAnalysis::Key;
1743 
1746  auto &AA = AM.getResult<AAManager>(F);
1747  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1748  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1749  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1750  auto &PV = AM.getResult<PhiValuesAnalysis>(F);
1751  return MemoryDependenceResults(AA, AC, TLI, DT, PV);
1752 }
1753 
1755 
1757  "Memory Dependence Analysis", false, true)
1765 
1766 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1768 }
1769 
1771 
1773  MemDep.reset();
1774 }
1775 
1777  AU.setPreservesAll();
1783 }
1784 
1787  // Check whether our analysis is preserved.
1788  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1789  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1790  // If not, give up now.
1791  return true;
1792 
1793  // Check whether the analyses we depend on became invalid for any reason.
1794  if (Inv.invalidate<AAManager>(F, PA) ||
1795  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1796  Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
1797  Inv.invalidate<PhiValuesAnalysis>(F, PA))
1798  return true;
1799 
1800  // Otherwise this analysis result remains valid.
1801  return false;
1802 }
1803 
1805  return BlockScanLimit;
1806 }
1807 
1809  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1810  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1811  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1812  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1813  auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
1814  MemDep.emplace(AA, AC, TLI, DT, PV);
1815  return false;
1816 }
The access may reference and may modify the value stored in memory.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:659
bool isSimple() const
Definition: Instructions.h:276
iterator_range< use_iterator > uses()
Definition: Value.h:354
The access neither references nor modifies the value stored in memory.
bool isStrongerThanUnordered(AtomicOrdering ao)
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
LLVM_NODISCARD ModRefInfo clearMust(const ModRefInfo MRI)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
PointerTy getPointer() const
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:454
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:45
bool isPrecise() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
The two locations do not alias at all.
Definition: AliasAnalysis.h:83
bool isTerminator() const
Definition: Instruction.h:128
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1014
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
This defines the Use class.
LLVM_NODISCARD bool isModAndRefSet(const ModRefInfo MRI)
The analysis pass which yields a PhiValues.
Definition: PhiValues.h:118
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:304
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
The access may reference the value stored in memory.
This file contains the simple types necessary to represent the attributes associated with functions a...
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, PhiValues &PV)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1632
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:809
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
An instruction for storing to memory.
Definition: Instructions.h:320
Wrapper pass for the legacy pass manager.
Definition: PhiValues.h:141
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:77
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
uint64_t getValue() const
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
MemoryLocation getWithoutAATags() const
MemoryLocation getWithNewSize(LocationSize NewSize) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
void invalidateValue(const Value *V)
Notify PhiValues that the cached information using V is no longer valid.
Definition: PhiValues.cpp:123
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
Value * getPointerOperand()
Definition: Instructions.h:284
self_iterator getIterator()
Definition: ilist_node.h:81
void setResult(const MemDepResult &R)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:639
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
A function analysis which provides an AssumptionCache.
The two locations precisely alias each other.
Definition: AliasAnalysis.h:89
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
iterator end()
Definition: BasicBlock.h:270
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
The access may modify the value stored in memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD bool isNoModRef(const ModRefInfo MRI)
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:386
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:59
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:641
auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1294
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:91
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
bool hasValue() const
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
print Instructions which execute on loop entry
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:316
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:87
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.