LLVM 20.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
83// Limit on the number of memdep results to process.
84static const unsigned int NumResultsLimit = 100;
85
86/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
87///
88/// If the set becomes empty, remove Inst's entry.
89template <typename KeyTy>
90static void
92 Instruction *Inst, KeyTy Val) {
93 typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
94 ReverseMap.find(Inst);
95 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
96 bool Found = InstIt->second.erase(Val);
97 assert(Found && "Invalid reverse map!");
98 (void)Found;
99 if (InstIt->second.empty())
100 ReverseMap.erase(InstIt);
101}
102
103/// If the given instruction references a specific memory location, fill in Loc
104/// with the details, otherwise set Loc.Ptr to null.
105///
106/// Returns a ModRefInfo value describing the general behavior of the
107/// instruction.
109 const TargetLibraryInfo &TLI) {
110 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111 if (LI->isUnordered()) {
112 Loc = MemoryLocation::get(LI);
113 return ModRefInfo::Ref;
114 }
115 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
116 Loc = MemoryLocation::get(LI);
117 return ModRefInfo::ModRef;
118 }
119 Loc = MemoryLocation();
120 return ModRefInfo::ModRef;
121 }
122
123 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124 if (SI->isUnordered()) {
125 Loc = MemoryLocation::get(SI);
126 return ModRefInfo::Mod;
127 }
128 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
129 Loc = MemoryLocation::get(SI);
130 return ModRefInfo::ModRef;
131 }
132 Loc = MemoryLocation();
133 return ModRefInfo::ModRef;
134 }
135
136 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
137 Loc = MemoryLocation::get(V);
138 return ModRefInfo::ModRef;
139 }
140
141 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
143 // calls to free() deallocate the entire structure
144 Loc = MemoryLocation::getAfter(FreedOp);
145 return ModRefInfo::Mod;
146 }
147 }
148
149 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
150 switch (II->getIntrinsicID()) {
151 case Intrinsic::lifetime_start:
152 case Intrinsic::lifetime_end:
153 case Intrinsic::invariant_start:
154 Loc = MemoryLocation::getForArgument(II, 1, TLI);
155 // These intrinsics don't really modify the memory, but returning Mod
156 // will allow them to be handled conservatively.
157 return ModRefInfo::Mod;
158 case Intrinsic::invariant_end:
159 Loc = MemoryLocation::getForArgument(II, 2, TLI);
160 // These intrinsics don't really modify the memory, but returning Mod
161 // will allow them to be handled conservatively.
162 return ModRefInfo::Mod;
163 case Intrinsic::masked_load:
164 Loc = MemoryLocation::getForArgument(II, 0, TLI);
165 return ModRefInfo::Ref;
166 case Intrinsic::masked_store:
167 Loc = MemoryLocation::getForArgument(II, 1, TLI);
168 return ModRefInfo::Mod;
169 default:
170 break;
171 }
172 }
173
174 // Otherwise, just do the coarse-grained thing that always works.
175 if (Inst->mayWriteToMemory())
176 return ModRefInfo::ModRef;
177 if (Inst->mayReadFromMemory())
178 return ModRefInfo::Ref;
179 return ModRefInfo::NoModRef;
180}
181
182/// Private helper for finding the local dependencies of a call site.
183MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185 BasicBlock *BB) {
186 unsigned Limit = getDefaultBlockScanLimit();
187
188 // Walk backwards through the block, looking for dependencies.
189 while (ScanIt != BB->begin()) {
190 Instruction *Inst = &*--ScanIt;
191 // Debug intrinsics don't cause dependences and should not affect Limit
192 if (isa<DbgInfoIntrinsic>(Inst))
193 continue;
194
195 // Limit the amount of scanning we do so we don't end up with quadratic
196 // running time on extreme testcases.
197 --Limit;
198 if (!Limit)
200
201 // If this inst is a memory op, get the pointer it accessed
202 MemoryLocation Loc;
203 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204 if (Loc.Ptr) {
205 // A simple instruction.
206 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207 return MemDepResult::getClobber(Inst);
208 continue;
209 }
210
211 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212 // If these two calls do not interfere, look past it.
213 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214 // If the two calls are the same, return Inst as a Def, so that
215 // Call can be found redundant and eliminated.
216 if (isReadOnlyCall && !isModSet(MR) &&
217 Call->isIdenticalToWhenDefined(CallB))
218 return MemDepResult::getDef(Inst);
219
220 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221 // keep scanning.
222 continue;
223 } else
224 return MemDepResult::getClobber(Inst);
225 }
226
227 // If we could not obtain a pointer for the instruction and the instruction
228 // touches memory then assume that this is a dependency.
229 if (isModOrRefSet(MR))
230 return MemDepResult::getClobber(Inst);
231 }
232
233 // No dependence found. If this is the entry block of the function, it is
234 // unknown, otherwise it is non-local.
235 if (BB != &BB->getParent()->getEntryBlock())
238}
239
241 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
242 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
243 BatchAAResults &BatchAA) {
244 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
245 if (QueryInst != nullptr) {
246 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
247 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
248
249 if (InvariantGroupDependency.isDef())
250 return InvariantGroupDependency;
251 }
252 }
254 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
255 if (SimpleDep.isDef())
256 return SimpleDep;
257 // Non-local invariant group dependency indicates there is non local Def
258 // (it only returns nonLocal if it finds nonLocal def), which is better than
259 // local clobber and everything else.
260 if (InvariantGroupDependency.isNonLocal())
261 return InvariantGroupDependency;
262
263 assert(InvariantGroupDependency.isUnknown() &&
264 "InvariantGroupDependency should be only unknown at this point");
265 return SimpleDep;
266}
267
269 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
270 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
271 BatchAAResults BatchAA(AA, &EII);
272 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
273 BatchAA);
274}
275
278 BasicBlock *BB) {
279
280 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
282
283 // Take the ptr operand after all casts and geps 0. This way we can search
284 // cast graph down only.
285 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
286
287 // It's is not safe to walk the use list of global value, because function
288 // passes aren't allowed to look outside their functions.
289 // FIXME: this could be fixed by filtering instructions from outside
290 // of current function.
291 if (isa<GlobalValue>(LoadOperand))
293
294 Instruction *ClosestDependency = nullptr;
295 // Order of instructions in uses list is unpredictible. In order to always
296 // get the same result, we will look for the closest dominance.
297 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
298 assert(Other && "Must call it with not null instruction");
299 if (Best == nullptr || DT.dominates(Best, Other))
300 return Other;
301 return Best;
302 };
303
304 for (const Use &Us : LoadOperand->uses()) {
305 auto *U = dyn_cast<Instruction>(Us.getUser());
306 if (!U || U == LI || !DT.dominates(U, LI))
307 continue;
308
309 // If we hit load/store with the same invariant.group metadata (and the
310 // same pointer operand) we can assume that value pointed by pointer
311 // operand didn't change.
312 if ((isa<LoadInst>(U) ||
313 (isa<StoreInst>(U) &&
314 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
315 U->hasMetadata(LLVMContext::MD_invariant_group))
316 ClosestDependency = GetClosestDependency(ClosestDependency, U);
317 }
318
319 if (!ClosestDependency)
321 if (ClosestDependency->getParent() == BB)
322 return MemDepResult::getDef(ClosestDependency);
323 // Def(U) can't be returned here because it is non-local. If local
324 // dependency won't be found then return nonLocal counting that the
325 // user will call getNonLocalPointerDependency, which will return cached
326 // result.
327 NonLocalDefsCache.try_emplace(
328 LI, NonLocalDepResult(ClosestDependency->getParent(),
329 MemDepResult::getDef(ClosestDependency), nullptr));
330 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
332}
333
334// Check if SI that may alias with MemLoc can be safely skipped. This is
335// possible in case if SI can only must alias or no alias with MemLoc (no
336// partial overlapping possible) and it writes the same value that MemLoc
337// contains now (it was loaded before this store and was not modified in
338// between).
339static bool canSkipClobberingStore(const StoreInst *SI,
340 const MemoryLocation &MemLoc,
341 Align MemLocAlign, BatchAAResults &BatchAA,
342 unsigned ScanLimit) {
343 if (!MemLoc.Size.hasValue())
344 return false;
345 if (MemoryLocation::get(SI).Size != MemLoc.Size)
346 return false;
347 if (MemLoc.Size.isScalable())
348 return false;
349 if (std::min(MemLocAlign, SI->getAlign()).value() <
350 MemLoc.Size.getValue().getKnownMinValue())
351 return false;
352
353 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
354 if (!LI || LI->getParent() != SI->getParent())
355 return false;
356 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
357 return false;
358 unsigned NumVisitedInsts = 0;
359 for (const Instruction *I = LI; I != SI; I = I->getNextNonDebugInstruction())
360 if (++NumVisitedInsts > ScanLimit ||
361 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
362 return false;
363
364 return true;
365}
366
368 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
369 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
370 BatchAAResults &BatchAA) {
371 bool isInvariantLoad = false;
372 Align MemLocAlign =
374
375 unsigned DefaultLimit = getDefaultBlockScanLimit();
376 if (!Limit)
377 Limit = &DefaultLimit;
378
379 // We must be careful with atomic accesses, as they may allow another thread
380 // to touch this location, clobbering it. We are conservative: if the
381 // QueryInst is not a simple (non-atomic) memory access, we automatically
382 // return getClobber.
383 // If it is simple, we know based on the results of
384 // "Compiler testing via a theory of sound optimisations in the C11/C++11
385 // memory model" in PLDI 2013, that a non-atomic location can only be
386 // clobbered between a pair of a release and an acquire action, with no
387 // access to the location in between.
388 // Here is an example for giving the general intuition behind this rule.
389 // In the following code:
390 // store x 0;
391 // release action; [1]
392 // acquire action; [4]
393 // %val = load x;
394 // It is unsafe to replace %val by 0 because another thread may be running:
395 // acquire action; [2]
396 // store x 42;
397 // release action; [3]
398 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
399 // being 42. A key property of this program however is that if either
400 // 1 or 4 were missing, there would be a race between the store of 42
401 // either the store of 0 or the load (making the whole program racy).
402 // The paper mentioned above shows that the same property is respected
403 // by every program that can detect any optimization of that kind: either
404 // it is racy (undefined) or there is a release followed by an acquire
405 // between the pair of accesses under consideration.
406
407 // If the load is invariant, we "know" that it doesn't alias *any* write. We
408 // do want to respect mustalias results since defs are useful for value
409 // forwarding, but any mayalias write can be assumed to be noalias.
410 // Arguably, this logic should be pushed inside AliasAnalysis itself.
411 if (isLoad && QueryInst)
412 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
413 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
414 isInvariantLoad = true;
415 MemLocAlign = LI->getAlign();
416 }
417
418 // True for volatile instruction.
419 // For Load/Store return true if atomic ordering is stronger than AO,
420 // for other instruction just true if it can read or write to memory.
421 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
422 if (I->isVolatile())
423 return true;
424 if (auto *LI = dyn_cast<LoadInst>(I))
425 return isStrongerThan(LI->getOrdering(), AO);
426 if (auto *SI = dyn_cast<StoreInst>(I))
427 return isStrongerThan(SI->getOrdering(), AO);
428 return I->mayReadOrWriteMemory();
429 };
430
431 // Walk backwards through the basic block, looking for dependencies.
432 while (ScanIt != BB->begin()) {
433 Instruction *Inst = &*--ScanIt;
434
435 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
436 // Debug intrinsics don't (and can't) cause dependencies.
437 if (isa<DbgInfoIntrinsic>(II))
438 continue;
439
440 // Limit the amount of scanning we do so we don't end up with quadratic
441 // running time on extreme testcases.
442 --*Limit;
443 if (!*Limit)
445
446 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
447 // If we reach a lifetime begin or end marker, then the query ends here
448 // because the value is undefined.
449 Intrinsic::ID ID = II->getIntrinsicID();
450 switch (ID) {
451 case Intrinsic::lifetime_start: {
452 // FIXME: This only considers queries directly on the invariant-tagged
453 // pointer, not on query pointers that are indexed off of them. It'd
454 // be nice to handle that at some point (the right approach is to use
455 // GetPointerBaseWithConstantOffset).
456 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
457 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
458 return MemDepResult::getDef(II);
459 continue;
460 }
461 case Intrinsic::masked_load:
462 case Intrinsic::masked_store: {
463 MemoryLocation Loc;
464 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
465 AliasResult R = BatchAA.alias(Loc, MemLoc);
466 if (R == AliasResult::NoAlias)
467 continue;
468 if (R == AliasResult::MustAlias)
469 return MemDepResult::getDef(II);
470 if (ID == Intrinsic::masked_load)
471 continue;
473 }
474 }
475 }
476
477 // Values depend on loads if the pointers are must aliased. This means
478 // that a load depends on another must aliased load from the same value.
479 // One exception is atomic loads: a value can depend on an atomic load that
480 // it does not alias with when this atomic load indicates that another
481 // thread may be accessing the location.
482 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
483 // While volatile access cannot be eliminated, they do not have to clobber
484 // non-aliasing locations, as normal accesses, for example, can be safely
485 // reordered with volatile accesses.
486 if (LI->isVolatile()) {
487 if (!QueryInst)
488 // Original QueryInst *may* be volatile
489 return MemDepResult::getClobber(LI);
490 if (QueryInst->isVolatile())
491 // Ordering required if QueryInst is itself volatile
492 return MemDepResult::getClobber(LI);
493 // Otherwise, volatile doesn't imply any special ordering
494 }
495
496 // Atomic loads have complications involved.
497 // A Monotonic (or higher) load is OK if the query inst is itself not
498 // atomic.
499 // FIXME: This is overly conservative.
500 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
501 if (!QueryInst ||
502 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
503 return MemDepResult::getClobber(LI);
504 if (LI->getOrdering() != AtomicOrdering::Monotonic)
505 return MemDepResult::getClobber(LI);
506 }
507
509
510 // If we found a pointer, check if it could be the same as our pointer.
511 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
512
513 if (R == AliasResult::NoAlias)
514 continue;
515
516 if (isLoad) {
517 // Must aliased loads are defs of each other.
518 if (R == AliasResult::MustAlias)
519 return MemDepResult::getDef(Inst);
520
521 // If we have a partial alias, then return this as a clobber for the
522 // client to handle.
523 if (R == AliasResult::PartialAlias && R.hasOffset()) {
524 ClobberOffsets[LI] = R.getOffset();
525 return MemDepResult::getClobber(Inst);
526 }
527
528 // Random may-alias loads don't depend on each other without a
529 // dependence.
530 continue;
531 }
532
533 // Stores don't alias loads from read-only memory.
534 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
535 continue;
536
537 // Stores depend on may/must aliased loads.
538 return MemDepResult::getDef(Inst);
539 }
540
541 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
542 // Atomic stores have complications involved.
543 // A Monotonic store is OK if the query inst is itself not atomic.
544 // FIXME: This is overly conservative.
545 if (!SI->isUnordered() && SI->isAtomic()) {
546 if (!QueryInst ||
547 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
548 return MemDepResult::getClobber(SI);
549 // Ok, if we are here the guard above guarantee us that
550 // QueryInst is a non-atomic or unordered load/store.
551 // SI is atomic with monotonic or release semantic (seq_cst for store
552 // is actually a release semantic plus total order over other seq_cst
553 // instructions, as soon as QueryInst is not seq_cst we can consider it
554 // as simple release semantic).
555 // Monotonic and Release semantic allows re-ordering before store
556 // so we are safe to go further and check the aliasing. It will prohibit
557 // re-ordering in case locations are may or must alias.
558 }
559
560 // While volatile access cannot be eliminated, they do not have to clobber
561 // non-aliasing locations, as normal accesses can for example be reordered
562 // with volatile accesses.
563 if (SI->isVolatile())
564 if (!QueryInst || QueryInst->isVolatile())
565 return MemDepResult::getClobber(SI);
566
567 // If alias analysis can tell that this store is guaranteed to not modify
568 // the query pointer, ignore it. Use getModRefInfo to handle cases where
569 // the query pointer points to constant memory etc.
570 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
571 continue;
572
573 // Ok, this store might clobber the query pointer. Check to see if it is
574 // a must alias: in this case, we want to return this as a def.
575 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
576 MemoryLocation StoreLoc = MemoryLocation::get(SI);
577
578 // If we found a pointer, check if it could be the same as our pointer.
579 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
580
581 if (R == AliasResult::NoAlias)
582 continue;
583 if (R == AliasResult::MustAlias)
584 return MemDepResult::getDef(Inst);
585 if (isInvariantLoad)
586 continue;
587 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
588 continue;
589 return MemDepResult::getClobber(Inst);
590 }
591
592 // If this is an allocation, and if we know that the accessed pointer is to
593 // the allocation, return Def. This means that there is no dependence and
594 // the access can be optimized based on that. For example, a load could
595 // turn into undef. Note that we can bypass the allocation itself when
596 // looking for a clobber in many cases; that's an alias property and is
597 // handled by BasicAA.
598 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
599 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
600 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
601 return MemDepResult::getDef(Inst);
602 }
603
604 // If we found a select instruction for MemLoc pointer, return it as Def
605 // dependency.
606 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
607 return MemDepResult::getDef(Inst);
608
609 if (isInvariantLoad)
610 continue;
611
612 // A release fence requires that all stores complete before it, but does
613 // not prevent the reordering of following loads or stores 'before' the
614 // fence. As a result, we look past it when finding a dependency for
615 // loads. DSE uses this to find preceding stores to delete and thus we
616 // can't bypass the fence if the query instruction is a store.
617 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
618 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
619 continue;
620
621 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
622 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
624 // If the call has no effect on the queried pointer, just ignore it.
625 continue;
626 case ModRefInfo::Mod:
627 return MemDepResult::getClobber(Inst);
628 case ModRefInfo::Ref:
629 // If the call is known to never store to the pointer, and if this is a
630 // load query, we can safely ignore it (scan past it).
631 if (isLoad)
632 continue;
633 [[fallthrough]];
634 default:
635 // Otherwise, there is a potential dependence. Return a clobber.
636 return MemDepResult::getClobber(Inst);
637 }
638 }
639
640 // No dependence found. If this is the entry block of the function, it is
641 // unknown, otherwise it is non-local.
642 if (BB != &BB->getParent()->getEntryBlock())
645}
646
648 ClobberOffsets.clear();
649 Instruction *ScanPos = QueryInst;
650
651 // Check for a cached result
652 MemDepResult &LocalCache = LocalDeps[QueryInst];
653
654 // If the cached entry is non-dirty, just return it. Note that this depends
655 // on MemDepResult's default constructing to 'dirty'.
656 if (!LocalCache.isDirty())
657 return LocalCache;
658
659 // Otherwise, if we have a dirty entry, we know we can start the scan at that
660 // instruction, which may save us some work.
661 if (Instruction *Inst = LocalCache.getInst()) {
662 ScanPos = Inst;
663
664 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
665 }
666
667 BasicBlock *QueryParent = QueryInst->getParent();
668
669 // Do the scan.
670 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
671 // No dependence found. If this is the entry block of the function, it is
672 // unknown, otherwise it is non-local.
673 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
674 LocalCache = MemDepResult::getNonLocal();
675 else
676 LocalCache = MemDepResult::getNonFuncLocal();
677 } else {
678 MemoryLocation MemLoc;
679 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
680 if (MemLoc.Ptr) {
681 // If we can do a pointer scan, make it happen.
682 bool isLoad = !isModSet(MR);
683 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
684 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
685
686 LocalCache =
687 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
688 QueryParent, QueryInst, nullptr);
689 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
690 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
691 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
692 ScanPos->getIterator(), QueryParent);
693 } else
694 // Non-memory instruction.
695 LocalCache = MemDepResult::getUnknown();
696 }
697
698 // Remember the result!
699 if (Instruction *I = LocalCache.getInst())
700 ReverseLocalDeps[I].insert(QueryInst);
701
702 return LocalCache;
703}
704
705#ifndef NDEBUG
706/// This method is used when -debug is specified to verify that cache arrays
707/// are properly kept sorted.
709 int Count = -1) {
710 if (Count == -1)
711 Count = Cache.size();
712 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
713 "Cache isn't sorted!");
714}
715#endif
716
719 assert(getDependency(QueryCall).isNonLocal() &&
720 "getNonLocalCallDependency should only be used on calls with "
721 "non-local deps!");
722 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
723 NonLocalDepInfo &Cache = CacheP.first;
724
725 // This is the set of blocks that need to be recomputed. In the cached case,
726 // this can happen due to instructions being deleted etc. In the uncached
727 // case, this starts out as the set of predecessors we care about.
729
730 if (!Cache.empty()) {
731 // Okay, we have a cache entry. If we know it is not dirty, just return it
732 // with no computation.
733 if (!CacheP.second) {
734 ++NumCacheNonLocal;
735 return Cache;
736 }
737
738 // If we already have a partially computed set of results, scan them to
739 // determine what is dirty, seeding our initial DirtyBlocks worklist.
740 for (auto &Entry : Cache)
741 if (Entry.getResult().isDirty())
742 DirtyBlocks.push_back(Entry.getBB());
743
744 // Sort the cache so that we can do fast binary search lookups below.
745 llvm::sort(Cache);
746
747 ++NumCacheDirtyNonLocal;
748 } else {
749 // Seed DirtyBlocks with each of the preds of QueryInst's block.
750 BasicBlock *QueryBB = QueryCall->getParent();
751 append_range(DirtyBlocks, PredCache.get(QueryBB));
752 ++NumUncacheNonLocal;
753 }
754
755 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
756 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
757
759
760 unsigned NumSortedEntries = Cache.size();
761 LLVM_DEBUG(AssertSorted(Cache));
762
763 // Iterate while we still have blocks to update.
764 while (!DirtyBlocks.empty()) {
765 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
766
767 // Already processed this block?
768 if (!Visited.insert(DirtyBB).second)
769 continue;
770
771 // Do a binary search to see if we already have an entry for this block in
772 // the cache set. If so, find it.
773 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
774 NonLocalDepInfo::iterator Entry =
775 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
776 NonLocalDepEntry(DirtyBB));
777 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
778 --Entry;
779
780 NonLocalDepEntry *ExistingResult = nullptr;
781 if (Entry != Cache.begin() + NumSortedEntries &&
782 Entry->getBB() == DirtyBB) {
783 // If we already have an entry, and if it isn't already dirty, the block
784 // is done.
785 if (!Entry->getResult().isDirty())
786 continue;
787
788 // Otherwise, remember this slot so we can update the value.
789 ExistingResult = &*Entry;
790 }
791
792 // If the dirty entry has a pointer, start scanning from it so we don't have
793 // to rescan the entire block.
794 BasicBlock::iterator ScanPos = DirtyBB->end();
795 if (ExistingResult) {
796 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
797 ScanPos = Inst->getIterator();
798 // We're removing QueryInst's use of Inst.
799 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
800 QueryCall);
801 }
802 }
803
804 // Find out if this block has a local dependency for QueryInst.
805 MemDepResult Dep;
806
807 if (ScanPos != DirtyBB->begin()) {
808 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
809 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
810 // No dependence found. If this is the entry block of the function, it is
811 // a clobber, otherwise it is unknown.
813 } else {
815 }
816
817 // If we had a dirty entry for the block, update it. Otherwise, just add
818 // a new entry.
819 if (ExistingResult)
820 ExistingResult->setResult(Dep);
821 else
822 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
823
824 // If the block has a dependency (i.e. it isn't completely transparent to
825 // the value), remember the association!
826 if (!Dep.isNonLocal()) {
827 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
828 // update this when we remove instructions.
829 if (Instruction *Inst = Dep.getInst())
830 ReverseNonLocalDeps[Inst].insert(QueryCall);
831 } else {
832
833 // If the block *is* completely transparent to the load, we need to check
834 // the predecessors of this block. Add them to our worklist.
835 append_range(DirtyBlocks, PredCache.get(DirtyBB));
836 }
837 }
838
839 return Cache;
840}
841
844 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
845 bool isLoad = isa<LoadInst>(QueryInst);
846 BasicBlock *FromBB = QueryInst->getParent();
847 assert(FromBB);
848
849 assert(Loc.Ptr->getType()->isPointerTy() &&
850 "Can't get pointer deps of a non-pointer!");
851 Result.clear();
852 {
853 // Check if there is cached Def with invariant.group.
854 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
855 if (NonLocalDefIt != NonLocalDefsCache.end()) {
856 Result.push_back(NonLocalDefIt->second);
857 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
858 .erase(QueryInst);
859 NonLocalDefsCache.erase(NonLocalDefIt);
860 return;
861 }
862 }
863 // This routine does not expect to deal with volatile instructions.
864 // Doing so would require piping through the QueryInst all the way through.
865 // TODO: volatiles can't be elided, but they can be reordered with other
866 // non-volatile accesses.
867
868 // We currently give up on any instruction which is ordered, but we do handle
869 // atomic instructions which are unordered.
870 // TODO: Handle ordered instructions
871 auto isOrdered = [](Instruction *Inst) {
872 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873 return !LI->isUnordered();
874 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875 return !SI->isUnordered();
876 }
877 return false;
878 };
879 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
880 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
881 const_cast<Value *>(Loc.Ptr)));
882 return;
883 }
884 const DataLayout &DL = FromBB->getDataLayout();
885 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
886
887 // This is the set of blocks we've inspected, and the pointer we consider in
888 // each block. Because of critical edges, we currently bail out if querying
889 // a block with multiple different pointers. This can happen during PHI
890 // translation.
892 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
893 Result, Visited, true))
894 return;
895 Result.clear();
896 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
897 const_cast<Value *>(Loc.Ptr)));
898}
899
900/// Compute the memdep value for BB with Pointer/PointeeSize using either
901/// cached information in Cache or by doing a lookup (which may use dirty cache
902/// info if available).
903///
904/// If we do a lookup, add the result to the cache.
905MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
906 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
907 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
908 BatchAAResults &BatchAA) {
909
910 bool isInvariantLoad = false;
911
912 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
913 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
914
915 // Do a binary search to see if we already have an entry for this block in
916 // the cache set. If so, find it.
917 NonLocalDepInfo::iterator Entry = std::upper_bound(
918 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
919 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
920 --Entry;
921
922 NonLocalDepEntry *ExistingResult = nullptr;
923 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
924 ExistingResult = &*Entry;
925
926 // Use cached result for invariant load only if there is no dependency for non
927 // invariant load. In this case invariant load can not have any dependency as
928 // well.
929 if (ExistingResult && isInvariantLoad &&
930 !ExistingResult->getResult().isNonFuncLocal())
931 ExistingResult = nullptr;
932
933 // If we have a cached entry, and it is non-dirty, use it as the value for
934 // this dependency.
935 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
936 ++NumCacheNonLocalPtr;
937 return ExistingResult->getResult();
938 }
939
940 // Otherwise, we have to scan for the value. If we have a dirty cache
941 // entry, start scanning from its position, otherwise we scan from the end
942 // of the block.
943 BasicBlock::iterator ScanPos = BB->end();
944 if (ExistingResult && ExistingResult->getResult().getInst()) {
945 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
946 "Instruction invalidated?");
947 ++NumCacheDirtyNonLocalPtr;
948 ScanPos = ExistingResult->getResult().getInst()->getIterator();
949
950 // Eliminating the dirty entry from 'Cache', so update the reverse info.
951 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
952 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
953 } else {
954 ++NumUncacheNonLocalPtr;
955 }
956
957 // Scan the block for the dependency.
958 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
959 QueryInst, nullptr, BatchAA);
960
961 // Don't cache results for invariant load.
962 if (isInvariantLoad)
963 return Dep;
964
965 // If we had a dirty entry for the block, update it. Otherwise, just add
966 // a new entry.
967 if (ExistingResult)
968 ExistingResult->setResult(Dep);
969 else
970 Cache->push_back(NonLocalDepEntry(BB, Dep));
971
972 // If the block has a dependency (i.e. it isn't completely transparent to
973 // the value), remember the reverse association because we just added it
974 // to Cache!
975 if (!Dep.isLocal())
976 return Dep;
977
978 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
979 // update MemDep when we remove instructions.
980 Instruction *Inst = Dep.getInst();
981 assert(Inst && "Didn't depend on anything?");
982 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
983 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
984 return Dep;
985}
986
987/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
988/// array that are already properly ordered.
989///
990/// This is optimized for the case when only a few entries are added.
991static void
993 unsigned NumSortedEntries) {
994 switch (Cache.size() - NumSortedEntries) {
995 case 0:
996 // done, no new entries.
997 break;
998 case 2: {
999 // Two new entries, insert the last one into place.
1000 NonLocalDepEntry Val = Cache.back();
1001 Cache.pop_back();
1002 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1003 std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1004 Cache.insert(Entry, Val);
1005 [[fallthrough]];
1006 }
1007 case 1:
1008 // One new entry, Just insert the new value at the appropriate position.
1009 if (Cache.size() != 1) {
1010 NonLocalDepEntry Val = Cache.back();
1011 Cache.pop_back();
1012 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1013 llvm::upper_bound(Cache, Val);
1014 Cache.insert(Entry, Val);
1015 }
1016 break;
1017 default:
1018 // Added many values, do a full scale sort.
1019 llvm::sort(Cache);
1020 break;
1021 }
1022}
1023
1024/// Perform a dependency query based on pointer/pointeesize starting at the end
1025/// of StartBB.
1026///
1027/// Add any clobber/def results to the results vector and keep track of which
1028/// blocks are visited in 'Visited'.
1029///
1030/// This has special behavior for the first block queries (when SkipFirstBlock
1031/// is true). In this special case, it ignores the contents of the specified
1032/// block and starts returning dependence info for its predecessors.
1033///
1034/// This function returns true on success, or false to indicate that it could
1035/// not compute dependence information for some reason. This should be treated
1036/// as a clobber dependence on the first instruction in the predecessor block.
1037bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1038 Instruction *QueryInst, const PHITransAddr &Pointer,
1039 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1041 DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
1042 bool IsIncomplete) {
1043 // Look up the cached info for Pointer.
1044 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1045
1046 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1047 // CacheKey, this value will be inserted as the associated value. Otherwise,
1048 // it'll be ignored, and we'll have to check to see if the cached size and
1049 // aa tags are consistent with the current query.
1050 NonLocalPointerInfo InitialNLPI;
1051 InitialNLPI.Size = Loc.Size;
1052 InitialNLPI.AATags = Loc.AATags;
1053
1054 bool isInvariantLoad = false;
1055 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1056 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1057
1058 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1059 // already have one.
1060 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1061 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1062 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1063
1064 // If we already have a cache entry for this CacheKey, we may need to do some
1065 // work to reconcile the cache entry and the current query.
1066 // Invariant loads don't participate in caching. Thus no need to reconcile.
1067 if (!isInvariantLoad && !Pair.second) {
1068 if (CacheInfo->Size != Loc.Size) {
1069 bool ThrowOutEverything;
1070 if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1071 // FIXME: We may be able to do better in the face of results with mixed
1072 // precision. We don't appear to get them in practice, though, so just
1073 // be conservative.
1074 ThrowOutEverything =
1075 CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1076 !TypeSize::isKnownGE(CacheInfo->Size.getValue(),
1077 Loc.Size.getValue());
1078 } else {
1079 // For our purposes, unknown size > all others.
1080 ThrowOutEverything = !Loc.Size.hasValue();
1081 }
1082
1083 if (ThrowOutEverything) {
1084 // The query's Size is greater than the cached one. Throw out the
1085 // cached data and proceed with the query at the greater size.
1086 CacheInfo->Pair = BBSkipFirstBlockPair();
1087 CacheInfo->Size = Loc.Size;
1088 for (auto &Entry : CacheInfo->NonLocalDeps)
1089 if (Instruction *Inst = Entry.getResult().getInst())
1090 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1091 CacheInfo->NonLocalDeps.clear();
1092 // The cache is cleared (in the above line) so we will have lost
1093 // information about blocks we have already visited. We therefore must
1094 // assume that the cache information is incomplete.
1095 IsIncomplete = true;
1096 } else {
1097 // This query's Size is less than the cached one. Conservatively restart
1098 // the query using the greater size.
1099 return getNonLocalPointerDepFromBB(
1100 QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1101 StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
1102 }
1103 }
1104
1105 // If the query's AATags are inconsistent with the cached one,
1106 // conservatively throw out the cached data and restart the query with
1107 // no tag if needed.
1108 if (CacheInfo->AATags != Loc.AATags) {
1109 if (CacheInfo->AATags) {
1110 CacheInfo->Pair = BBSkipFirstBlockPair();
1111 CacheInfo->AATags = AAMDNodes();
1112 for (auto &Entry : CacheInfo->NonLocalDeps)
1113 if (Instruction *Inst = Entry.getResult().getInst())
1114 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1115 CacheInfo->NonLocalDeps.clear();
1116 // The cache is cleared (in the above line) so we will have lost
1117 // information about blocks we have already visited. We therefore must
1118 // assume that the cache information is incomplete.
1119 IsIncomplete = true;
1120 }
1121 if (Loc.AATags)
1122 return getNonLocalPointerDepFromBB(
1123 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1124 Visited, SkipFirstBlock, IsIncomplete);
1125 }
1126 }
1127
1128 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1129
1130 // If we have valid cached information for exactly the block we are
1131 // investigating, just return it with no recomputation.
1132 // Don't use cached information for invariant loads since it is valid for
1133 // non-invariant loads only.
1134 if (!IsIncomplete && !isInvariantLoad &&
1135 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1136 // We have a fully cached result for this query then we can just return the
1137 // cached results and populate the visited set. However, we have to verify
1138 // that we don't already have conflicting results for these blocks. Check
1139 // to ensure that if a block in the results set is in the visited set that
1140 // it was for the same pointer query.
1141 if (!Visited.empty()) {
1142 for (auto &Entry : *Cache) {
1144 Visited.find(Entry.getBB());
1145 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1146 continue;
1147
1148 // We have a pointer mismatch in a block. Just return false, saying
1149 // that something was clobbered in this result. We could also do a
1150 // non-fully cached query, but there is little point in doing this.
1151 return false;
1152 }
1153 }
1154
1155 Value *Addr = Pointer.getAddr();
1156 for (auto &Entry : *Cache) {
1157 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1158 if (Entry.getResult().isNonLocal()) {
1159 continue;
1160 }
1161
1162 if (DT.isReachableFromEntry(Entry.getBB())) {
1163 Result.push_back(
1164 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1165 }
1166 }
1167 ++NumCacheCompleteNonLocalPtr;
1168 return true;
1169 }
1170
1171 // Otherwise, either this is a new block, a block with an invalid cache
1172 // pointer or one that we're about to invalidate by putting more info into
1173 // it than its valid cache info. If empty and not explicitly indicated as
1174 // incomplete, the result will be valid cache info, otherwise it isn't.
1175 //
1176 // Invariant loads don't affect cache in any way thus no need to update
1177 // CacheInfo as well.
1178 if (!isInvariantLoad) {
1179 if (!IsIncomplete && Cache->empty())
1180 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1181 else
1182 CacheInfo->Pair = BBSkipFirstBlockPair();
1183 }
1184
1186 Worklist.push_back(StartBB);
1187
1188 // PredList used inside loop.
1190
1191 // Keep track of the entries that we know are sorted. Previously cached
1192 // entries will all be sorted. The entries we add we only sort on demand (we
1193 // don't insert every element into its sorted position). We know that we
1194 // won't get any reuse from currently inserted values, because we don't
1195 // revisit blocks after we insert info for them.
1196 unsigned NumSortedEntries = Cache->size();
1197 unsigned WorklistEntries = BlockNumberLimit;
1198 bool GotWorklistLimit = false;
1199 LLVM_DEBUG(AssertSorted(*Cache));
1200
1201 BatchAAResults BatchAA(AA, &EII);
1202 while (!Worklist.empty()) {
1203 BasicBlock *BB = Worklist.pop_back_val();
1204
1205 // If we do process a large number of blocks it becomes very expensive and
1206 // likely it isn't worth worrying about
1207 if (Result.size() > NumResultsLimit) {
1208 // Sort it now (if needed) so that recursive invocations of
1209 // getNonLocalPointerDepFromBB and other routines that could reuse the
1210 // cache value will only see properly sorted cache arrays.
1211 if (Cache && NumSortedEntries != Cache->size()) {
1212 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1213 }
1214 // Since we bail out, the "Cache" set won't contain all of the
1215 // results for the query. This is ok (we can still use it to accelerate
1216 // specific block queries) but we can't do the fastpath "return all
1217 // results from the set". Clear out the indicator for this.
1218 CacheInfo->Pair = BBSkipFirstBlockPair();
1219 return false;
1220 }
1221
1222 // Skip the first block if we have it.
1223 if (!SkipFirstBlock) {
1224 // Analyze the dependency of *Pointer in FromBB. See if we already have
1225 // been here.
1226 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1227
1228 // Get the dependency info for Pointer in BB. If we have cached
1229 // information, we will use it, otherwise we compute it.
1230 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1231 MemDepResult Dep = getNonLocalInfoForBlock(
1232 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1233
1234 // If we got a Def or Clobber, add this to the list of results.
1235 if (!Dep.isNonLocal()) {
1236 if (DT.isReachableFromEntry(BB)) {
1237 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1238 continue;
1239 }
1240 }
1241 }
1242
1243 // If 'Pointer' is an instruction defined in this block, then we need to do
1244 // phi translation to change it into a value live in the predecessor block.
1245 // If not, we just add the predecessors to the worklist and scan them with
1246 // the same Pointer.
1247 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1248 SkipFirstBlock = false;
1250 for (BasicBlock *Pred : PredCache.get(BB)) {
1251 // Verify that we haven't looked at this block yet.
1252 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1253 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1254 if (InsertRes.second) {
1255 // First time we've looked at *PI.
1256 NewBlocks.push_back(Pred);
1257 continue;
1258 }
1259
1260 // If we have seen this block before, but it was with a different
1261 // pointer then we have a phi translation failure and we have to treat
1262 // this as a clobber.
1263 if (InsertRes.first->second != Pointer.getAddr()) {
1264 // Make sure to clean up the Visited map before continuing on to
1265 // PredTranslationFailure.
1266 for (auto *NewBlock : NewBlocks)
1267 Visited.erase(NewBlock);
1268 goto PredTranslationFailure;
1269 }
1270 }
1271 if (NewBlocks.size() > WorklistEntries) {
1272 // Make sure to clean up the Visited map before continuing on to
1273 // PredTranslationFailure.
1274 for (auto *NewBlock : NewBlocks)
1275 Visited.erase(NewBlock);
1276 GotWorklistLimit = true;
1277 goto PredTranslationFailure;
1278 }
1279 WorklistEntries -= NewBlocks.size();
1280 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1281 continue;
1282 }
1283
1284 // We do need to do phi translation, if we know ahead of time we can't phi
1285 // translate this value, don't even try.
1286 if (!Pointer.isPotentiallyPHITranslatable())
1287 goto PredTranslationFailure;
1288
1289 // We may have added values to the cache list before this PHI translation.
1290 // If so, we haven't done anything to ensure that the cache remains sorted.
1291 // Sort it now (if needed) so that recursive invocations of
1292 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1293 // value will only see properly sorted cache arrays.
1294 if (Cache && NumSortedEntries != Cache->size()) {
1295 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1296 NumSortedEntries = Cache->size();
1297 }
1298 Cache = nullptr;
1299
1300 PredList.clear();
1301 for (BasicBlock *Pred : PredCache.get(BB)) {
1302 PredList.push_back(std::make_pair(Pred, Pointer));
1303
1304 // Get the PHI translated pointer in this predecessor. This can fail if
1305 // not translatable, in which case the getAddr() returns null.
1306 PHITransAddr &PredPointer = PredList.back().second;
1307 Value *PredPtrVal =
1308 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1309
1310 // Check to see if we have already visited this pred block with another
1311 // pointer. If so, we can't do this lookup. This failure can occur
1312 // with PHI translation when a critical edge exists and the PHI node in
1313 // the successor translates to a pointer value different than the
1314 // pointer the block was first analyzed with.
1315 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1316 Visited.insert(std::make_pair(Pred, PredPtrVal));
1317
1318 if (!InsertRes.second) {
1319 // We found the pred; take it off the list of preds to visit.
1320 PredList.pop_back();
1321
1322 // If the predecessor was visited with PredPtr, then we already did
1323 // the analysis and can ignore it.
1324 if (InsertRes.first->second == PredPtrVal)
1325 continue;
1326
1327 // Otherwise, the block was previously analyzed with a different
1328 // pointer. We can't represent the result of this case, so we just
1329 // treat this as a phi translation failure.
1330
1331 // Make sure to clean up the Visited map before continuing on to
1332 // PredTranslationFailure.
1333 for (const auto &Pred : PredList)
1334 Visited.erase(Pred.first);
1335
1336 goto PredTranslationFailure;
1337 }
1338 }
1339
1340 // Actually process results here; this need to be a separate loop to avoid
1341 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1342 // any results for. (getNonLocalPointerDepFromBB will modify our
1343 // datastructures in ways the code after the PredTranslationFailure label
1344 // doesn't expect.)
1345 for (auto &I : PredList) {
1346 BasicBlock *Pred = I.first;
1347 PHITransAddr &PredPointer = I.second;
1348 Value *PredPtrVal = PredPointer.getAddr();
1349
1350 bool CanTranslate = true;
1351 // If PHI translation was unable to find an available pointer in this
1352 // predecessor, then we have to assume that the pointer is clobbered in
1353 // that predecessor. We can still do PRE of the load, which would insert
1354 // a computation of the pointer in this predecessor.
1355 if (!PredPtrVal)
1356 CanTranslate = false;
1357
1358 // FIXME: it is entirely possible that PHI translating will end up with
1359 // the same value. Consider PHI translating something like:
1360 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1361 // to recurse here, pedantically speaking.
1362
1363 // If getNonLocalPointerDepFromBB fails here, that means the cached
1364 // result conflicted with the Visited list; we have to conservatively
1365 // assume it is unknown, but this also does not block PRE of the load.
1366 if (!CanTranslate ||
1367 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1368 Loc.getWithNewPtr(PredPtrVal), isLoad,
1369 Pred, Result, Visited)) {
1370 // Add the entry to the Result list.
1372 Result.push_back(Entry);
1373
1374 // Since we had a phi translation failure, the cache for CacheKey won't
1375 // include all of the entries that we need to immediately satisfy future
1376 // queries. Mark this in NonLocalPointerDeps by setting the
1377 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1378 // cached value to do more work but not miss the phi trans failure.
1379 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1380 NLPI.Pair = BBSkipFirstBlockPair();
1381 continue;
1382 }
1383 }
1384
1385 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1386 CacheInfo = &NonLocalPointerDeps[CacheKey];
1387 Cache = &CacheInfo->NonLocalDeps;
1388 NumSortedEntries = Cache->size();
1389
1390 // Since we did phi translation, the "Cache" set won't contain all of the
1391 // results for the query. This is ok (we can still use it to accelerate
1392 // specific block queries) but we can't do the fastpath "return all
1393 // results from the set" Clear out the indicator for this.
1394 CacheInfo->Pair = BBSkipFirstBlockPair();
1395 SkipFirstBlock = false;
1396 continue;
1397
1398 PredTranslationFailure:
1399 // The following code is "failure"; we can't produce a sane translation
1400 // for the given block. It assumes that we haven't modified any of
1401 // our datastructures while processing the current block.
1402
1403 if (!Cache) {
1404 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1405 CacheInfo = &NonLocalPointerDeps[CacheKey];
1406 Cache = &CacheInfo->NonLocalDeps;
1407 NumSortedEntries = Cache->size();
1408 }
1409
1410 // Since we failed phi translation, the "Cache" set won't contain all of the
1411 // results for the query. This is ok (we can still use it to accelerate
1412 // specific block queries) but we can't do the fastpath "return all
1413 // results from the set". Clear out the indicator for this.
1414 CacheInfo->Pair = BBSkipFirstBlockPair();
1415
1416 // If *nothing* works, mark the pointer as unknown.
1417 //
1418 // If this is the magic first block, return this as a clobber of the whole
1419 // incoming value. Since we can't phi translate to one of the predecessors,
1420 // we have to bail out.
1421 if (SkipFirstBlock)
1422 return false;
1423
1424 // Results of invariant loads are not cached thus no need to update cached
1425 // information.
1426 if (!isInvariantLoad) {
1427 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1428 if (I.getBB() != BB)
1429 continue;
1430
1431 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1432 !DT.isReachableFromEntry(BB)) &&
1433 "Should only be here with transparent block");
1434
1435 I.setResult(MemDepResult::getUnknown());
1436
1437
1438 break;
1439 }
1440 }
1441 (void)GotWorklistLimit;
1442 // Go ahead and report unknown dependence.
1443 Result.push_back(
1445 }
1446
1447 // Okay, we're done now. If we added new values to the cache, re-sort it.
1448 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1449 LLVM_DEBUG(AssertSorted(*Cache));
1450 return true;
1451}
1452
1453/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1454void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1455 ValueIsLoadPair P) {
1456
1457 // Most of the time this cache is empty.
1458 if (!NonLocalDefsCache.empty()) {
1459 auto it = NonLocalDefsCache.find(P.getPointer());
1460 if (it != NonLocalDefsCache.end()) {
1461 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1462 it->second.getResult().getInst(), P.getPointer());
1463 NonLocalDefsCache.erase(it);
1464 }
1465
1466 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1467 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1468 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1469 for (const auto *entry : toRemoveIt->second)
1470 NonLocalDefsCache.erase(entry);
1471 ReverseNonLocalDefsCache.erase(toRemoveIt);
1472 }
1473 }
1474 }
1475
1476 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1477 if (It == NonLocalPointerDeps.end())
1478 return;
1479
1480 // Remove all of the entries in the BB->val map. This involves removing
1481 // instructions from the reverse map.
1482 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1483
1484 for (const NonLocalDepEntry &DE : PInfo) {
1485 Instruction *Target = DE.getResult().getInst();
1486 if (!Target)
1487 continue; // Ignore non-local dep results.
1488 assert(Target->getParent() == DE.getBB());
1489
1490 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1491 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1492 }
1493
1494 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1495 NonLocalPointerDeps.erase(It);
1496}
1497
1499 // If Ptr isn't really a pointer, just ignore it.
1500 if (!Ptr->getType()->isPointerTy())
1501 return;
1502 // Flush store info for the pointer.
1503 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1504 // Flush load info for the pointer.
1505 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1506}
1507
1509 PredCache.clear();
1510}
1511
1513 EII.removeInstruction(RemInst);
1514
1515 // Walk through the Non-local dependencies, removing this one as the value
1516 // for any cached queries.
1517 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1518 if (NLDI != NonLocalDepsMap.end()) {
1519 NonLocalDepInfo &BlockMap = NLDI->second.first;
1520 for (auto &Entry : BlockMap)
1521 if (Instruction *Inst = Entry.getResult().getInst())
1522 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1523 NonLocalDepsMap.erase(NLDI);
1524 }
1525
1526 // If we have a cached local dependence query for this instruction, remove it.
1527 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1528 if (LocalDepEntry != LocalDeps.end()) {
1529 // Remove us from DepInst's reverse set now that the local dep info is gone.
1530 if (Instruction *Inst = LocalDepEntry->second.getInst())
1531 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1532
1533 // Remove this local dependency info.
1534 LocalDeps.erase(LocalDepEntry);
1535 }
1536
1537 // If we have any cached dependencies on this instruction, remove
1538 // them.
1539
1540 // If the instruction is a pointer, remove it from both the load info and the
1541 // store info.
1542 if (RemInst->getType()->isPointerTy()) {
1543 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1544 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1545 } else {
1546 // Otherwise, if the instructions is in the map directly, it must be a load.
1547 // Remove it.
1548 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1549 if (toRemoveIt != NonLocalDefsCache.end()) {
1550 assert(isa<LoadInst>(RemInst) &&
1551 "only load instructions should be added directly");
1552 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1553 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1554 NonLocalDefsCache.erase(toRemoveIt);
1555 }
1556 }
1557
1558 // Loop over all of the things that depend on the instruction we're removing.
1560
1561 // If we find RemInst as a clobber or Def in any of the maps for other values,
1562 // we need to replace its entry with a dirty version of the instruction after
1563 // it. If RemInst is a terminator, we use a null dirty value.
1564 //
1565 // Using a dirty version of the instruction after RemInst saves having to scan
1566 // the entire block to get to this point.
1567 MemDepResult NewDirtyVal;
1568 if (!RemInst->isTerminator())
1569 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1570
1571 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1572 if (ReverseDepIt != ReverseLocalDeps.end()) {
1573 // RemInst can't be the terminator if it has local stuff depending on it.
1574 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1575 "Nothing can locally depend on a terminator");
1576
1577 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1578 assert(InstDependingOnRemInst != RemInst &&
1579 "Already removed our local dep info");
1580
1581 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1582
1583 // Make sure to remember that new things depend on NewDepInst.
1584 assert(NewDirtyVal.getInst() &&
1585 "There is no way something else can have "
1586 "a local dep on this if it is a terminator!");
1587 ReverseDepsToAdd.push_back(
1588 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1589 }
1590
1591 ReverseLocalDeps.erase(ReverseDepIt);
1592
1593 // Add new reverse deps after scanning the set, to avoid invalidating the
1594 // 'ReverseDeps' reference.
1595 while (!ReverseDepsToAdd.empty()) {
1596 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1597 ReverseDepsToAdd.back().second);
1598 ReverseDepsToAdd.pop_back();
1599 }
1600 }
1601
1602 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1603 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1604 for (Instruction *I : ReverseDepIt->second) {
1605 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1606
1607 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1608 // The information is now dirty!
1609 INLD.second = true;
1610
1611 for (auto &Entry : INLD.first) {
1612 if (Entry.getResult().getInst() != RemInst)
1613 continue;
1614
1615 // Convert to a dirty entry for the subsequent instruction.
1616 Entry.setResult(NewDirtyVal);
1617
1618 if (Instruction *NextI = NewDirtyVal.getInst())
1619 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1620 }
1621 }
1622
1623 ReverseNonLocalDeps.erase(ReverseDepIt);
1624
1625 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1626 while (!ReverseDepsToAdd.empty()) {
1627 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1628 ReverseDepsToAdd.back().second);
1629 ReverseDepsToAdd.pop_back();
1630 }
1631 }
1632
1633 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1634 // value in the NonLocalPointerDeps info.
1635 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1636 ReverseNonLocalPtrDeps.find(RemInst);
1637 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1639 ReversePtrDepsToAdd;
1640
1641 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1642 assert(P.getPointer() != RemInst &&
1643 "Already removed NonLocalPointerDeps info for RemInst");
1644
1645 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1646
1647 // The cache is not valid for any specific block anymore.
1648 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1649
1650 // Update any entries for RemInst to use the instruction after it.
1651 for (auto &Entry : NLPDI) {
1652 if (Entry.getResult().getInst() != RemInst)
1653 continue;
1654
1655 // Convert to a dirty entry for the subsequent instruction.
1656 Entry.setResult(NewDirtyVal);
1657
1658 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1659 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1660 }
1661
1662 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1663 // subsequent value may invalidate the sortedness.
1664 llvm::sort(NLPDI);
1665 }
1666
1667 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1668
1669 while (!ReversePtrDepsToAdd.empty()) {
1670 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1671 ReversePtrDepsToAdd.back().second);
1672 ReversePtrDepsToAdd.pop_back();
1673 }
1674 }
1675
1676 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1677 LLVM_DEBUG(verifyRemoved(RemInst));
1678}
1679
1680/// Verify that the specified instruction does not occur in our internal data
1681/// structures.
1682///
1683/// This function verifies by asserting in debug builds.
1684void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1685#ifndef NDEBUG
1686 for (const auto &DepKV : LocalDeps) {
1687 assert(DepKV.first != D && "Inst occurs in data structures");
1688 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1689 }
1690
1691 for (const auto &DepKV : NonLocalPointerDeps) {
1692 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1693 for (const auto &Entry : DepKV.second.NonLocalDeps)
1694 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1695 }
1696
1697 for (const auto &DepKV : NonLocalDepsMap) {
1698 assert(DepKV.first != D && "Inst occurs in data structures");
1699 const PerInstNLInfo &INLD = DepKV.second;
1700 for (const auto &Entry : INLD.first)
1701 assert(Entry.getResult().getInst() != D &&
1702 "Inst occurs in data structures");
1703 }
1704
1705 for (const auto &DepKV : ReverseLocalDeps) {
1706 assert(DepKV.first != D && "Inst occurs in data structures");
1707 for (Instruction *Inst : DepKV.second)
1708 assert(Inst != D && "Inst occurs in data structures");
1709 }
1710
1711 for (const auto &DepKV : ReverseNonLocalDeps) {
1712 assert(DepKV.first != D && "Inst occurs in data structures");
1713 for (Instruction *Inst : DepKV.second)
1714 assert(Inst != D && "Inst occurs in data structures");
1715 }
1716
1717 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1718 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1719
1720 for (ValueIsLoadPair P : DepKV.second)
1721 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1722 "Inst occurs in ReverseNonLocalPtrDeps map");
1723 }
1724#endif
1725}
1726
1727AnalysisKey MemoryDependenceAnalysis::Key;
1728
1730 : DefaultBlockScanLimit(BlockScanLimit) {}
1731
1734 auto &AA = AM.getResult<AAManager>(F);
1735 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1736 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1737 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1738 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1739}
1740
1742
1744 "Memory Dependence Analysis", false, true)
1751
1754}
1755
1757
1759 MemDep.reset();
1760}
1761
1763 AU.setPreservesAll();
1768}
1769
1772 // Check whether our analysis is preserved.
1773 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1774 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1775 // If not, give up now.
1776 return true;
1777
1778 // Check whether the analyses we depend on became invalid for any reason.
1779 if (Inv.invalidate<AAManager>(F, PA) ||
1780 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1782 return true;
1783
1784 // Otherwise this analysis result remains valid.
1785 return false;
1786}
1787
1789 return DefaultBlockScanLimit;
1790}
1791
1793 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1794 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1795 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1796 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1797 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1798 return false;
1799}
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1745
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
Definition: AliasAnalysis.h:77
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:95
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:310
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
void removeInstruction(Instruction *I)
An instruction for ordering other memory operations.
Definition: Instructions.h:420
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:363
bool isTerminator() const
Definition: Instruction.h:277
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
MemoryLocation getWithoutAATags() const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
Definition: PHITransAddr.h:58
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
size_type size() const
Definition: SmallPtrSet.h:95
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:239
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class provides various memory handling functions that manipulate MemoryBlock instances.
Definition: Memory.h:52
@ Entry
Definition: COFF.h:826
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
bool isStrongerThanUnordered(AtomicOrdering AO)
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1974
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
@ Other
Any other memory.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isNoModRef(const ModRefInfo MRI)
Definition: ModRef.h:39
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28