LLVM 23.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
84 "memdep-cache-global-limit", cl::Hidden, cl::init(10000),
85 cl::desc("The max number of entries allowed in a cache (default = 10000)"));
86
87// Limit on the number of memdep results to process.
88static const unsigned int NumResultsLimit = 100;
89
90/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
91///
92/// If the set becomes empty, remove Inst's entry.
93template <typename KeyTy>
94static void
96 Instruction *Inst, KeyTy Val) {
98 ReverseMap.find(Inst);
99 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
100 bool Found = InstIt->second.erase(Val);
101 assert(Found && "Invalid reverse map!");
102 (void)Found;
103 if (InstIt->second.empty())
104 ReverseMap.erase(InstIt);
105}
106
107/// If the given instruction references a specific memory location, fill in Loc
108/// with the details, otherwise set Loc.Ptr to null.
109///
110/// Returns a ModRefInfo value describing the general behavior of the
111/// instruction.
113 const TargetLibraryInfo &TLI) {
114 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
115 if (LI->isUnordered()) {
117 return ModRefInfo::Ref;
118 }
119 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
121 return ModRefInfo::ModRef;
122 }
124 return ModRefInfo::ModRef;
125 }
126
127 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
128 if (SI->isUnordered()) {
130 return ModRefInfo::Mod;
131 }
132 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
134 return ModRefInfo::ModRef;
135 }
137 return ModRefInfo::ModRef;
138 }
139
140 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
142 return ModRefInfo::ModRef;
143 }
144
145 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
146 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
147 // calls to free() deallocate the entire structure
148 Loc = MemoryLocation::getAfter(FreedOp);
149 return ModRefInfo::Mod;
150 }
151 }
152
153 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
154 switch (II->getIntrinsicID()) {
155 case Intrinsic::lifetime_start:
156 case Intrinsic::lifetime_end:
158 // These intrinsics don't really modify the memory, but returning Mod
159 // will allow them to be handled conservatively.
160 return ModRefInfo::Mod;
161 case Intrinsic::invariant_start:
163 // These intrinsics don't really modify the memory, but returning Mod
164 // will allow them to be handled conservatively.
165 return ModRefInfo::Mod;
166 case Intrinsic::invariant_end:
168 // These intrinsics don't really modify the memory, but returning Mod
169 // will allow them to be handled conservatively.
170 return ModRefInfo::Mod;
171 case Intrinsic::masked_load:
173 return ModRefInfo::Ref;
174 case Intrinsic::masked_store:
176 return ModRefInfo::Mod;
177 default:
178 break;
179 }
180 }
181
182 // Otherwise, just do the coarse-grained thing that always works.
183 if (Inst->mayWriteToMemory())
184 return ModRefInfo::ModRef;
185 if (Inst->mayReadFromMemory())
186 return ModRefInfo::Ref;
188}
189
190/// Private helper for finding the local dependencies of a call site.
191MemDepResult MemoryDependenceResults::getCallDependencyFrom(
192 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
193 BasicBlock *BB) {
194 unsigned Limit = getDefaultBlockScanLimit();
195
196 // Walk backwards through the block, looking for dependencies.
197 while (ScanIt != BB->begin()) {
198 Instruction *Inst = &*--ScanIt;
199
200 // Limit the amount of scanning we do so we don't end up with quadratic
201 // running time on extreme testcases.
202 --Limit;
203 if (!Limit)
205
206 // If this inst is a memory op, get the pointer it accessed
207 MemoryLocation Loc;
208 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
209 if (Loc.Ptr) {
210 // A simple instruction.
211 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
212 return MemDepResult::getClobber(Inst);
213 continue;
214 }
215
216 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
217 // If these two calls do not interfere, look past it.
218 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
219 // If the two calls are the same, return Inst as a Def, so that
220 // Call can be found redundant and eliminated.
221 if (isReadOnlyCall && !isModSet(MR) &&
223 return MemDepResult::getDef(Inst);
224
225 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
226 // keep scanning.
227 continue;
228 } else
229 return MemDepResult::getClobber(Inst);
230 }
231
232 // If we could not obtain a pointer for the instruction and the instruction
233 // touches memory then assume that this is a dependency.
234 if (isModOrRefSet(MR))
235 return MemDepResult::getClobber(Inst);
236 }
237
238 // No dependence found. If this is the entry block of the function, it is
239 // unknown, otherwise it is non-local.
240 if (BB != &BB->getParent()->getEntryBlock())
243}
244
246 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
247 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
248 BatchAAResults &BatchAA) {
249 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
250 if (QueryInst != nullptr) {
251 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
252 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
253
254 if (InvariantGroupDependency.isDef())
255 return InvariantGroupDependency;
256 }
257 }
259 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
260 if (SimpleDep.isDef())
261 return SimpleDep;
262 // Non-local invariant group dependency indicates there is non local Def
263 // (it only returns nonLocal if it finds nonLocal def), which is better than
264 // local clobber and everything else.
265 if (InvariantGroupDependency.isNonLocal())
266 return InvariantGroupDependency;
267
268 assert(InvariantGroupDependency.isUnknown() &&
269 "InvariantGroupDependency should be only unknown at this point");
270 return SimpleDep;
271}
272
274 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
275 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
276 BatchAAResults BatchAA(AA, &EEA);
277 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
278 BatchAA);
279}
280
283 BasicBlock *BB) {
284
285 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
287
288 // Take the ptr operand after all casts and geps 0. This way we can search
289 // cast graph down only.
290 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
291
292 // It's is not safe to walk the use list of global value, because function
293 // passes aren't allowed to look outside their functions.
294 // FIXME: this could be fixed by filtering instructions from outside
295 // of current function.
296 if (isa<GlobalValue>(LoadOperand))
298
299 Instruction *ClosestDependency = nullptr;
300 // Order of instructions in uses list is unpredictible. In order to always
301 // get the same result, we will look for the closest dominance.
302 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
303 assert(Other && "Must call it with not null instruction");
304 if (Best == nullptr || DT.dominates(Best, Other))
305 return Other;
306 return Best;
307 };
308
309 for (const Use &Us : LoadOperand->uses()) {
310 auto *U = dyn_cast<Instruction>(Us.getUser());
311 if (!U || U == LI || !DT.dominates(U, LI))
312 continue;
313
314 // If we hit load/store with the same invariant.group metadata (and the
315 // same pointer operand) we can assume that value pointed by pointer
316 // operand didn't change.
317 if ((isa<LoadInst>(U) ||
318 (isa<StoreInst>(U) &&
319 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
320 U->hasMetadata(LLVMContext::MD_invariant_group))
321 ClosestDependency = GetClosestDependency(ClosestDependency, U);
322 }
323
324 if (!ClosestDependency)
326 if (ClosestDependency->getParent() == BB)
327 return MemDepResult::getDef(ClosestDependency);
328 // Def(U) can't be returned here because it is non-local. If local
329 // dependency won't be found then return nonLocal counting that the
330 // user will call getNonLocalPointerDependency, which will return cached
331 // result.
332 NonLocalDefsCache.try_emplace(
333 LI, NonLocalDepResult(ClosestDependency->getParent(),
334 MemDepResult::getDef(ClosestDependency), nullptr));
335 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
337}
338
339// Check if SI that may alias with MemLoc can be safely skipped. This is
340// possible in case if SI can only must alias or no alias with MemLoc (no
341// partial overlapping possible) and it writes the same value that MemLoc
342// contains now (it was loaded before this store and was not modified in
343// between).
345 const MemoryLocation &MemLoc,
346 Align MemLocAlign, BatchAAResults &BatchAA,
347 unsigned ScanLimit) {
348 if (!MemLoc.Size.hasValue())
349 return false;
350 if (MemoryLocation::get(SI).Size != MemLoc.Size)
351 return false;
352 if (MemLoc.Size.isScalable())
353 return false;
354 if (std::min(MemLocAlign, SI->getAlign()).value() <
355 MemLoc.Size.getValue().getKnownMinValue())
356 return false;
357
358 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
359 if (!LI || LI->getParent() != SI->getParent())
360 return false;
361 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
362 return false;
363 unsigned NumVisitedInsts = 0;
364 for (const Instruction *I = LI; I != SI; I = I->getNextNode())
365 if (++NumVisitedInsts > ScanLimit ||
366 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
367 return false;
368
369 return true;
370}
371
373 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
374 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
375 BatchAAResults &BatchAA) {
376 bool isInvariantLoad = false;
377 Align MemLocAlign =
379
380 unsigned DefaultLimit = getDefaultBlockScanLimit();
381 if (!Limit)
382 Limit = &DefaultLimit;
383
384 // We must be careful with atomic accesses, as they may allow another thread
385 // to touch this location, clobbering it. We are conservative: if the
386 // QueryInst is not a simple (non-atomic) memory access, we automatically
387 // return getClobber.
388 // If it is simple, we know based on the results of
389 // "Compiler testing via a theory of sound optimisations in the C11/C++11
390 // memory model" in PLDI 2013, that a non-atomic location can only be
391 // clobbered between a pair of a release and an acquire action, with no
392 // access to the location in between.
393 // Here is an example for giving the general intuition behind this rule.
394 // In the following code:
395 // store x 0;
396 // release action; [1]
397 // acquire action; [4]
398 // %val = load x;
399 // It is unsafe to replace %val by 0 because another thread may be running:
400 // acquire action; [2]
401 // store x 42;
402 // release action; [3]
403 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
404 // being 42. A key property of this program however is that if either
405 // 1 or 4 were missing, there would be a race between the store of 42
406 // either the store of 0 or the load (making the whole program racy).
407 // The paper mentioned above shows that the same property is respected
408 // by every program that can detect any optimization of that kind: either
409 // it is racy (undefined) or there is a release followed by an acquire
410 // between the pair of accesses under consideration.
411
412 // If the load is invariant, we "know" that it doesn't alias *any* write. We
413 // do want to respect mustalias results since defs are useful for value
414 // forwarding, but any mayalias write can be assumed to be noalias.
415 // Arguably, this logic should be pushed inside AliasAnalysis itself.
416 if (isLoad && QueryInst)
417 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
418 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
419 isInvariantLoad = true;
420 MemLocAlign = LI->getAlign();
421 }
422
423 // True for volatile instruction.
424 // For Load/Store return true if atomic ordering is stronger than AO,
425 // for other instruction just true if it can read or write to memory.
426 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
427 if (I->isVolatile())
428 return true;
429 if (auto *LI = dyn_cast<LoadInst>(I))
430 return isStrongerThan(LI->getOrdering(), AO);
431 if (auto *SI = dyn_cast<StoreInst>(I))
432 return isStrongerThan(SI->getOrdering(), AO);
433 return I->mayReadOrWriteMemory();
434 };
435
436 // Walk backwards through the basic block, looking for dependencies.
437 while (ScanIt != BB->begin()) {
438 Instruction *Inst = &*--ScanIt;
439
440 // Limit the amount of scanning we do so we don't end up with quadratic
441 // running time on extreme testcases.
442 --*Limit;
443 if (!*Limit)
445
447 // If we reach a lifetime begin or end marker, then the query ends here
448 // because the value is undefined.
449 Intrinsic::ID ID = II->getIntrinsicID();
450 switch (ID) {
451 case Intrinsic::lifetime_start: {
452 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0));
453 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
454 return MemDepResult::getDef(II);
455 continue;
456 }
457 case Intrinsic::masked_load:
458 case Intrinsic::masked_store: {
460 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
461 AliasResult R = BatchAA.alias(Loc, MemLoc);
462 if (R == AliasResult::NoAlias)
463 continue;
464 if (R == AliasResult::MustAlias)
465 return MemDepResult::getDef(II);
466 if (ID == Intrinsic::masked_load)
467 continue;
469 }
470 }
471 }
472
473 // Values depend on loads if the pointers are must aliased. This means
474 // that a load depends on another must aliased load from the same value.
475 // One exception is atomic loads: a value can depend on an atomic load that
476 // it does not alias with when this atomic load indicates that another
477 // thread may be accessing the location.
478 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
479 // While volatile access cannot be eliminated, they do not have to clobber
480 // non-aliasing locations, as normal accesses, for example, can be safely
481 // reordered with volatile accesses.
482 if (LI->isVolatile()) {
483 if (!QueryInst)
484 // Original QueryInst *may* be volatile
485 return MemDepResult::getClobber(LI);
486 if (QueryInst->isVolatile())
487 // Ordering required if QueryInst is itself volatile
488 return MemDepResult::getClobber(LI);
489 // Otherwise, volatile doesn't imply any special ordering
490 }
491
492 // Atomic loads have complications involved.
493 // A Monotonic (or higher) load is OK if the query inst is itself not
494 // atomic.
495 // FIXME: This is overly conservative.
496 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
497 if (!QueryInst ||
498 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
499 return MemDepResult::getClobber(LI);
500 if (LI->getOrdering() != AtomicOrdering::Monotonic)
501 return MemDepResult::getClobber(LI);
502 }
503
505
506 // If we found a pointer, check if it could be the same as our pointer.
507 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
508
509 if (R == AliasResult::NoAlias)
510 continue;
511
512 if (isLoad) {
513 // Must aliased loads are defs of each other.
514 if (R == AliasResult::MustAlias)
515 return MemDepResult::getDef(Inst);
516
517 // If we have a partial alias, then return this as a clobber for the
518 // client to handle.
519 if (R == AliasResult::PartialAlias && R.hasOffset()) {
520 ClobberOffsets[LI] = R.getOffset();
521 return MemDepResult::getClobber(Inst);
522 }
523
524 // Random may-alias loads don't depend on each other without a
525 // dependence.
526 continue;
527 }
528
529 // Stores don't alias loads from read-only memory.
530 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
531 continue;
532
533 // Stores depend on may/must aliased loads.
534 return MemDepResult::getDef(Inst);
535 }
536
537 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
538 // Atomic stores have complications involved.
539 // A Monotonic store is OK if the query inst is itself not atomic.
540 // FIXME: This is overly conservative.
541 if (!SI->isUnordered() && SI->isAtomic()) {
542 if (!QueryInst ||
543 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
545 // Ok, if we are here the guard above guarantee us that
546 // QueryInst is a non-atomic or unordered load/store.
547 // SI is atomic with monotonic or release semantic (seq_cst for store
548 // is actually a release semantic plus total order over other seq_cst
549 // instructions, as soon as QueryInst is not seq_cst we can consider it
550 // as simple release semantic).
551 // Monotonic and Release semantic allows re-ordering before store
552 // so we are safe to go further and check the aliasing. It will prohibit
553 // re-ordering in case locations are may or must alias.
554 }
555
556 // While volatile access cannot be eliminated, they do not have to clobber
557 // non-aliasing locations, as normal accesses can for example be reordered
558 // with volatile accesses.
559 if (SI->isVolatile())
560 if (!QueryInst || QueryInst->isVolatile())
562
563 // If alias analysis can tell that this store is guaranteed to not modify
564 // the query pointer, ignore it. Use getModRefInfo to handle cases where
565 // the query pointer points to constant memory etc.
566 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
567 continue;
568
569 // Ok, this store might clobber the query pointer. Check to see if it is
570 // a must alias: in this case, we want to return this as a def.
571 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
573
574 // If we found a pointer, check if it could be the same as our pointer.
575 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
576
577 if (R == AliasResult::NoAlias)
578 continue;
579 if (R == AliasResult::MustAlias)
580 return MemDepResult::getDef(Inst);
581 if (isInvariantLoad)
582 continue;
583 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
584 continue;
585 return MemDepResult::getClobber(Inst);
586 }
587
588 // If this is an allocation, and if we know that the accessed pointer is to
589 // the allocation, return Def. This means that there is no dependence and
590 // the access can be optimized based on that. For example, a load could
591 // turn into undef. Note that we can bypass the allocation itself when
592 // looking for a clobber in many cases; that's an alias property and is
593 // handled by BasicAA.
594 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
595 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
596 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
597 return MemDepResult::getDef(Inst);
598 }
599
600 // If we found a select instruction for MemLoc pointer, return it as Def
601 // dependency.
602 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
603 return MemDepResult::getDef(Inst);
604
605 if (isInvariantLoad)
606 continue;
607
608 // A release fence requires that all stores complete before it, but does
609 // not prevent the reordering of following loads or stores 'before' the
610 // fence. As a result, we look past it when finding a dependency for
611 // loads. DSE uses this to find preceding stores to delete and thus we
612 // can't bypass the fence if the query instruction is a store.
613 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
614 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
615 continue;
616
617 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
618 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
620 // If the call has no effect on the queried pointer, just ignore it.
621 continue;
622 case ModRefInfo::Mod:
623 return MemDepResult::getClobber(Inst);
624 case ModRefInfo::Ref:
625 // If the call is known to never store to the pointer, and if this is a
626 // load query, we can safely ignore it (scan past it).
627 if (isLoad)
628 continue;
629 [[fallthrough]];
630 default:
631 // Otherwise, there is a potential dependence. Return a clobber.
632 return MemDepResult::getClobber(Inst);
633 }
634 }
635
636 // No dependence found. If this is the entry block of the function, it is
637 // unknown, otherwise it is non-local.
638 if (BB != &BB->getParent()->getEntryBlock())
641}
642
644 ClobberOffsets.clear();
645 Instruction *ScanPos = QueryInst;
646
647 // Check for a cached result
648 MemDepResult &LocalCache = LocalDeps[QueryInst];
649
650 // If the cached entry is non-dirty, just return it. Note that this depends
651 // on MemDepResult's default constructing to 'dirty'.
652 if (!LocalCache.isDirty())
653 return LocalCache;
654
655 // Otherwise, if we have a dirty entry, we know we can start the scan at that
656 // instruction, which may save us some work.
657 if (Instruction *Inst = LocalCache.getInst()) {
658 ScanPos = Inst;
659
660 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
661 }
662
663 BasicBlock *QueryParent = QueryInst->getParent();
664
665 // Do the scan.
666 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
667 // No dependence found. If this is the entry block of the function, it is
668 // unknown, otherwise it is non-local.
669 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
670 LocalCache = MemDepResult::getNonLocal();
671 else
672 LocalCache = MemDepResult::getNonFuncLocal();
673 } else {
674 MemoryLocation MemLoc;
675 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
676 if (MemLoc.Ptr) {
677 // If we can do a pointer scan, make it happen.
678 bool isLoad = !isModSet(MR);
679 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
680 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
681
682 LocalCache =
683 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
684 QueryParent, QueryInst, nullptr);
685 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
686 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
687 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
688 ScanPos->getIterator(), QueryParent);
689 } else
690 // Non-memory instruction.
691 LocalCache = MemDepResult::getUnknown();
692 }
693
694 // Remember the result!
695 if (Instruction *I = LocalCache.getInst())
696 ReverseLocalDeps[I].insert(QueryInst);
697
698 return LocalCache;
699}
700
701#ifndef NDEBUG
702/// This method is used when -debug is specified to verify that cache arrays
703/// are properly kept sorted.
705 int Count = -1) {
706 if (Count == -1)
707 Count = Cache.size();
708 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
709 "Cache isn't sorted!");
710}
711#endif
712
715 assert(getDependency(QueryCall).isNonLocal() &&
716 "getNonLocalCallDependency should only be used on calls with "
717 "non-local deps!");
718 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
719 NonLocalDepInfo &Cache = CacheP.first;
720
721 // This is the set of blocks that need to be recomputed. In the cached case,
722 // this can happen due to instructions being deleted etc. In the uncached
723 // case, this starts out as the set of predecessors we care about.
725
726 if (!Cache.empty()) {
727 // Okay, we have a cache entry. If we know it is not dirty, just return it
728 // with no computation.
729 if (!CacheP.second) {
730 ++NumCacheNonLocal;
731 return Cache;
732 }
733
734 // If we already have a partially computed set of results, scan them to
735 // determine what is dirty, seeding our initial DirtyBlocks worklist.
736 for (auto &Entry : Cache)
737 if (Entry.getResult().isDirty())
738 DirtyBlocks.push_back(Entry.getBB());
739
740 // Sort the cache so that we can do fast binary search lookups below.
741 llvm::sort(Cache);
742
743 ++NumCacheDirtyNonLocal;
744 } else {
745 // Seed DirtyBlocks with each of the preds of QueryInst's block.
746 BasicBlock *QueryBB = QueryCall->getParent();
747 append_range(DirtyBlocks, PredCache.get(QueryBB));
748 ++NumUncacheNonLocal;
749 }
750
751 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
752 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
753
755
756 unsigned NumSortedEntries = Cache.size();
757 LLVM_DEBUG(AssertSorted(Cache));
758
759 // Iterate while we still have blocks to update.
760 while (!DirtyBlocks.empty()) {
761 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
762
763 // Already processed this block?
764 if (!Visited.insert(DirtyBB).second)
765 continue;
766
767 // Do a binary search to see if we already have an entry for this block in
768 // the cache set. If so, find it.
769 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
770 NonLocalDepInfo::iterator Entry =
771 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
772 NonLocalDepEntry(DirtyBB));
773 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
774 --Entry;
775
776 NonLocalDepEntry *ExistingResult = nullptr;
777 if (Entry != Cache.begin() + NumSortedEntries &&
778 Entry->getBB() == DirtyBB) {
779 // If we already have an entry, and if it isn't already dirty, the block
780 // is done.
781 if (!Entry->getResult().isDirty())
782 continue;
783
784 // Otherwise, remember this slot so we can update the value.
785 ExistingResult = &*Entry;
786 }
787
788 // If the dirty entry has a pointer, start scanning from it so we don't have
789 // to rescan the entire block.
790 BasicBlock::iterator ScanPos = DirtyBB->end();
791 if (ExistingResult) {
792 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
793 ScanPos = Inst->getIterator();
794 // We're removing QueryInst's use of Inst.
795 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
796 QueryCall);
797 }
798 }
799
800 // Find out if this block has a local dependency for QueryInst.
801 MemDepResult Dep;
802
803 if (ScanPos != DirtyBB->begin()) {
804 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
805 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
806 // No dependence found. If this is the entry block of the function, it is
807 // a clobber, otherwise it is unknown.
809 } else {
811 }
812
813 // If we had a dirty entry for the block, update it. Otherwise, just add
814 // a new entry.
815 if (ExistingResult)
816 ExistingResult->setResult(Dep);
817 else
818 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
819
820 // If the block has a dependency (i.e. it isn't completely transparent to
821 // the value), remember the association!
822 if (!Dep.isNonLocal()) {
823 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
824 // update this when we remove instructions.
825 if (Instruction *Inst = Dep.getInst())
826 ReverseNonLocalDeps[Inst].insert(QueryCall);
827 } else {
828
829 // If the block *is* completely transparent to the load, we need to check
830 // the predecessors of this block. Add them to our worklist.
831 append_range(DirtyBlocks, PredCache.get(DirtyBB));
832 }
833 }
834
835 return Cache;
836}
837
840 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
841 bool isLoad = isa<LoadInst>(QueryInst);
842 BasicBlock *FromBB = QueryInst->getParent();
843 assert(FromBB);
844
845 assert(Loc.Ptr->getType()->isPointerTy() &&
846 "Can't get pointer deps of a non-pointer!");
847 Result.clear();
848 {
849 // Check if there is cached Def with invariant.group.
850 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
851 if (NonLocalDefIt != NonLocalDefsCache.end()) {
852 Result.push_back(NonLocalDefIt->second);
853 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
854 .erase(QueryInst);
855 NonLocalDefsCache.erase(NonLocalDefIt);
856 return;
857 }
858 }
859 // This routine does not expect to deal with volatile instructions.
860 // Doing so would require piping through the QueryInst all the way through.
861 // TODO: volatiles can't be elided, but they can be reordered with other
862 // non-volatile accesses.
863
864 // We currently give up on any instruction which is ordered, but we do handle
865 // atomic instructions which are unordered.
866 // TODO: Handle ordered instructions
867 auto isOrdered = [](Instruction *Inst) {
868 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
869 return !LI->isUnordered();
870 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
871 return !SI->isUnordered();
872 }
873 return false;
874 };
875 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
876 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
877 const_cast<Value *>(Loc.Ptr)));
878 return;
879 }
880 const DataLayout &DL = FromBB->getDataLayout();
881 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
882
883 // NonLocalPointerDepVisited is the set of blocks we've inspected, and the
884 // pointer we consider in each block. Because of critical edges, we currently
885 // bail out if querying a block with multiple different pointers. This can
886 // happen during PHI translation.
887 ++NonLocalPointerDepEpoch;
888 assert(NonLocalPointerDepEpoch > 0 &&
889 "NonLocalPointerDepVisitedEpoch overflow");
890 NonLocalPointerDepVisited.resize(FromBB->getParent()->getMaxBlockNumber());
891 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
892 Result, true))
893 return;
894 Result.clear();
895 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
896 const_cast<Value *>(Loc.Ptr)));
897}
898
899/// Compute the memdep value for BB with Pointer/PointeeSize using either
900/// cached information in Cache or by doing a lookup (which may use dirty cache
901/// info if available).
902///
903/// If we do a lookup, add the result to the cache.
904MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
905 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
906 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
907 BatchAAResults &BatchAA) {
908
909 bool isInvariantLoad = false;
910
911 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
912 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
913
914 // Do a binary search to see if we already have an entry for this block in
915 // the cache set. If so, find it.
916 NonLocalDepInfo::iterator Entry = std::upper_bound(
917 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
918 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
919 --Entry;
920
921 NonLocalDepEntry *ExistingResult = nullptr;
922 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
923 ExistingResult = &*Entry;
924
925 // Use cached result for invariant load only if there is no dependency for non
926 // invariant load. In this case invariant load can not have any dependency as
927 // well.
928 if (ExistingResult && isInvariantLoad &&
929 !ExistingResult->getResult().isNonFuncLocal())
930 ExistingResult = nullptr;
931
932 // If we have a cached entry, and it is non-dirty, use it as the value for
933 // this dependency.
934 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
935 ++NumCacheNonLocalPtr;
936 return ExistingResult->getResult();
937 }
938
939 // Otherwise, we have to scan for the value. If we have a dirty cache
940 // entry, start scanning from its position, otherwise we scan from the end
941 // of the block.
942 BasicBlock::iterator ScanPos = BB->end();
943 if (ExistingResult && ExistingResult->getResult().getInst()) {
944 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
945 "Instruction invalidated?");
946 ++NumCacheDirtyNonLocalPtr;
947 ScanPos = ExistingResult->getResult().getInst()->getIterator();
948
949 // Eliminating the dirty entry from 'Cache', so update the reverse info.
950 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
951 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
952 } else {
953 ++NumUncacheNonLocalPtr;
954 }
955
956 // Scan the block for the dependency.
957 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
958 QueryInst, nullptr, BatchAA);
959
960 // Don't cache results for invariant load.
961 if (isInvariantLoad)
962 return Dep;
963
964 // If we had a dirty entry for the block, update it. Otherwise, just add
965 // a new entry.
966 if (ExistingResult)
967 ExistingResult->setResult(Dep);
968 else
969 Cache->push_back(NonLocalDepEntry(BB, Dep));
970
971 // If the block has a dependency (i.e. it isn't completely transparent to
972 // the value), remember the reverse association because we just added it
973 // to Cache!
974 if (!Dep.isLocal())
975 return Dep;
976
977 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
978 // update MemDep when we remove instructions.
979 Instruction *Inst = Dep.getInst();
980 assert(Inst && "Didn't depend on anything?");
981 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
982 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
983 return Dep;
984}
985
986/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
987/// array that are already properly ordered.
988///
989/// This is optimized for the case when only a few entries are added.
990static void
992 unsigned NumSortedEntries) {
993
994 // If only one entry, don't sort.
995 if (Cache.size() < 2)
996 return;
997
998 unsigned s = Cache.size() - NumSortedEntries;
999
1000 // If the cache is already sorted, don't sort it again.
1001 if (s == 0)
1002 return;
1003
1004 // If no entry is sorted, sort the whole cache.
1005 if (NumSortedEntries == 0) {
1006 llvm::sort(Cache);
1007 return;
1008 }
1009
1010 // If the number of unsorted entires is small and the cache size is big, using
1011 // insertion sort is faster. Here use Log2_32 to quickly choose the sort
1012 // method.
1013 if (s < Log2_32(Cache.size())) {
1014 while (s > 0) {
1015 NonLocalDepEntry Val = Cache.back();
1016 Cache.pop_back();
1017 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1018 std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
1019 Cache.insert(Entry, Val);
1020 s--;
1021 }
1022 } else {
1023 llvm::sort(Cache);
1024 }
1025}
1026
1027void MemoryDependenceResults::setNonLocalPointerDepVisited(BasicBlock *BB,
1028 Value *V) {
1029 NonLocalPointerDepVisited[BB->getNumber()] = {V, NonLocalPointerDepEpoch};
1030}
1031
1032Value *
1033MemoryDependenceResults::lookupNonLocalPointerDepVisited(BasicBlock *BB) const {
1034 auto &Entry = NonLocalPointerDepVisited[BB->getNumber()];
1035 return Entry.second == NonLocalPointerDepEpoch ? Entry.first : nullptr;
1036}
1037
1038/// Perform a dependency query based on pointer/pointeesize starting at the end
1039/// of StartBB.
1040///
1041/// Add any clobber/def results to the results vector and keep track of which
1042/// blocks are visited in 'NonLocalPointerDepVisited'.
1043///
1044/// This has special behavior for the first block queries (when SkipFirstBlock
1045/// is true). In this special case, it ignores the contents of the specified
1046/// block and starts returning dependence info for its predecessors.
1047///
1048/// This function returns true on success, or false to indicate that it could
1049/// not compute dependence information for some reason. This should be treated
1050/// as a clobber dependence on the first instruction in the predecessor block.
1051bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1052 Instruction *QueryInst, const PHITransAddr &Pointer,
1053 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1054 SmallVectorImpl<NonLocalDepResult> &Result, bool SkipFirstBlock,
1055 bool IsIncomplete) {
1056 // Look up the cached info for Pointer.
1057 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1058
1059 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1060 // CacheKey, this value will be inserted as the associated value. Otherwise,
1061 // it'll be ignored, and we'll have to check to see if the cached size and
1062 // aa tags are consistent with the current query.
1063 NonLocalPointerInfo InitialNLPI;
1064 InitialNLPI.Size = Loc.Size;
1065 InitialNLPI.AATags = Loc.AATags;
1066
1067 bool isInvariantLoad = false;
1068 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1069 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1070
1071 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1072 // already have one.
1073 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1074 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1075 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1076
1077 // If we already have a cache entry for this CacheKey, we may need to do some
1078 // work to reconcile the cache entry and the current query.
1079 // Invariant loads don't participate in caching. Thus no need to reconcile.
1080 if (!isInvariantLoad && !Pair.second) {
1081 if (CacheInfo->Size != Loc.Size) {
1082 // The query's Size is not equal to the cached one. Throw out the cached
1083 // data and proceed with the query with the new size.
1084 CacheInfo->Pair = BBSkipFirstBlockPair();
1085 CacheInfo->Size = Loc.Size;
1086 for (auto &Entry : CacheInfo->NonLocalDeps)
1087 if (Instruction *Inst = Entry.getResult().getInst())
1088 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1089 CacheInfo->NonLocalDeps.clear();
1090 // The cache is cleared (in the above line) so we will have lost
1091 // information about blocks we have already visited. We therefore must
1092 // assume that the cache information is incomplete.
1093 IsIncomplete = true;
1094 }
1095
1096 // If the query's AATags are inconsistent with the cached one,
1097 // conservatively throw out the cached data and restart the query with
1098 // no tag if needed.
1099 if (CacheInfo->AATags != Loc.AATags) {
1100 if (CacheInfo->AATags) {
1101 CacheInfo->Pair = BBSkipFirstBlockPair();
1102 CacheInfo->AATags = AAMDNodes();
1103 for (auto &Entry : CacheInfo->NonLocalDeps)
1104 if (Instruction *Inst = Entry.getResult().getInst())
1105 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1106 CacheInfo->NonLocalDeps.clear();
1107 // The cache is cleared (in the above line) so we will have lost
1108 // information about blocks we have already visited. We therefore must
1109 // assume that the cache information is incomplete.
1110 IsIncomplete = true;
1111 }
1112 if (Loc.AATags)
1113 return getNonLocalPointerDepFromBB(
1114 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1115 SkipFirstBlock, IsIncomplete);
1116 }
1117 }
1118
1119 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1120
1121 // If we have valid cached information for exactly the block we are
1122 // investigating, just return it with no recomputation.
1123 // Don't use cached information for invariant loads since it is valid for
1124 // non-invariant loads only.
1125 if (!IsIncomplete && !isInvariantLoad &&
1126 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1127 // We have a fully cached result for this query then we can just return the
1128 // cached results and populate the visited set. However, we have to verify
1129 // that we don't already have conflicting results for these blocks. Check
1130 // to ensure that if a block in the results set is in the visited set that
1131 // it was for the same pointer query.
1132 for (auto &Entry : *Cache) {
1133 Value *Prev = lookupNonLocalPointerDepVisited(Entry.getBB());
1134 if (!Prev || Prev == Pointer.getAddr())
1135 continue;
1136
1137 // We have a pointer mismatch in a block. Just return false, saying
1138 // that something was clobbered in this result. We could also do a
1139 // non-fully cached query, but there is little point in doing this.
1140 return false;
1141 }
1142
1143 Value *Addr = Pointer.getAddr();
1144 for (auto &Entry : *Cache) {
1145 setNonLocalPointerDepVisited(Entry.getBB(), Addr);
1146 if (Entry.getResult().isNonLocal()) {
1147 continue;
1148 }
1149
1150 if (DT.isReachableFromEntry(Entry.getBB())) {
1151 Result.push_back(
1152 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1153 }
1154 }
1155 ++NumCacheCompleteNonLocalPtr;
1156 return true;
1157 }
1158
1159 // If the size of this cache has surpassed the global limit, stop here.
1160 if (Cache->size() > CacheGlobalLimit)
1161 return false;
1162
1163 // Otherwise, either this is a new block, a block with an invalid cache
1164 // pointer or one that we're about to invalidate by putting more info into
1165 // it than its valid cache info. If empty and not explicitly indicated as
1166 // incomplete, the result will be valid cache info, otherwise it isn't.
1167 //
1168 // Invariant loads don't affect cache in any way thus no need to update
1169 // CacheInfo as well.
1170 if (!isInvariantLoad) {
1171 if (!IsIncomplete && Cache->empty())
1172 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1173 else
1174 CacheInfo->Pair = BBSkipFirstBlockPair();
1175 }
1176
1178 Worklist.push_back(StartBB);
1179
1180 // PredList used inside loop.
1182
1183 // Keep track of the entries that we know are sorted. Previously cached
1184 // entries will all be sorted. The entries we add we only sort on demand (we
1185 // don't insert every element into its sorted position). We know that we
1186 // won't get any reuse from currently inserted values, because we don't
1187 // revisit blocks after we insert info for them.
1188 unsigned NumSortedEntries = Cache->size();
1189 unsigned WorklistEntries = BlockNumberLimit;
1190 bool GotWorklistLimit = false;
1191 LLVM_DEBUG(AssertSorted(*Cache));
1192
1193 BatchAAResults BatchAA(AA, &EEA);
1194 while (!Worklist.empty()) {
1195 BasicBlock *BB = Worklist.pop_back_val();
1196
1197 // If we do process a large number of blocks it becomes very expensive and
1198 // likely it isn't worth worrying about
1199 if (Result.size() > NumResultsLimit) {
1200 // Sort it now (if needed) so that recursive invocations of
1201 // getNonLocalPointerDepFromBB and other routines that could reuse the
1202 // cache value will only see properly sorted cache arrays.
1203 if (Cache && NumSortedEntries != Cache->size()) {
1204 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1205 }
1206 // Since we bail out, the "Cache" set won't contain all of the
1207 // results for the query. This is ok (we can still use it to accelerate
1208 // specific block queries) but we can't do the fastpath "return all
1209 // results from the set". Clear out the indicator for this.
1210 CacheInfo->Pair = BBSkipFirstBlockPair();
1211 return false;
1212 }
1213
1214 // Skip the first block if we have it.
1215 if (!SkipFirstBlock) {
1216 // Analyze the dependency of *Pointer in FromBB. See if we already have
1217 // been here.
1218 assert(lookupNonLocalPointerDepVisited(BB) &&
1219 "Should check 'visited' before adding to WL");
1220
1221 // Get the dependency info for Pointer in BB. If we have cached
1222 // information, we will use it, otherwise we compute it.
1223 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1224 MemDepResult Dep = getNonLocalInfoForBlock(
1225 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1226
1227 // If we got a Def or Clobber, add this to the list of results.
1228 if (!Dep.isNonLocal()) {
1229 if (DT.isReachableFromEntry(BB)) {
1230 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1231 continue;
1232 }
1233 }
1234 }
1235
1236 // If 'Pointer' is an instruction defined in this block, then we need to do
1237 // phi translation to change it into a value live in the predecessor block.
1238 // If not, we just add the predecessors to the worklist and scan them with
1239 // the same Pointer.
1240 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1241 SkipFirstBlock = false;
1242 SmallVector<BasicBlock *, 16> NewBlocks;
1243 for (BasicBlock *Pred : PredCache.get(BB)) {
1244 // Verify that we haven't looked at this block yet.
1245 Value *Prev = lookupNonLocalPointerDepVisited(Pred);
1246 if (!Prev) {
1247 setNonLocalPointerDepVisited(Pred, Pointer.getAddr());
1248 // First time we've looked at *PI.
1249 NewBlocks.push_back(Pred);
1250 continue;
1251 }
1252
1253 // If we have seen this block before, but it was with a different
1254 // pointer then we have a phi translation failure and we have to treat
1255 // this as a clobber.
1256 if (Prev != Pointer.getAddr()) {
1257 // Make sure to clean up the Visited map before continuing on to
1258 // PredTranslationFailure.
1259 for (auto *NewBlock : NewBlocks)
1260 setNonLocalPointerDepVisited(NewBlock, nullptr);
1261 goto PredTranslationFailure;
1262 }
1263 }
1264 if (NewBlocks.size() > WorklistEntries) {
1265 // Make sure to clean up the Visited map before continuing on to
1266 // PredTranslationFailure.
1267 for (auto *NewBlock : NewBlocks)
1268 setNonLocalPointerDepVisited(NewBlock, nullptr);
1269 GotWorklistLimit = true;
1270 goto PredTranslationFailure;
1271 }
1272 WorklistEntries -= NewBlocks.size();
1273 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1274 continue;
1275 }
1276
1277 // We do need to do phi translation, if we know ahead of time we can't phi
1278 // translate this value, don't even try.
1279 if (!Pointer.isPotentiallyPHITranslatable())
1280 goto PredTranslationFailure;
1281
1282 // We may have added values to the cache list before this PHI translation.
1283 // If so, we haven't done anything to ensure that the cache remains sorted.
1284 // Sort it now (if needed) so that recursive invocations of
1285 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1286 // value will only see properly sorted cache arrays.
1287 if (Cache && NumSortedEntries != Cache->size()) {
1288 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1289 NumSortedEntries = Cache->size();
1290 }
1291 Cache = nullptr;
1292
1293 PredList.clear();
1294 for (BasicBlock *Pred : PredCache.get(BB)) {
1295 PredList.push_back(std::make_pair(Pred, Pointer));
1296
1297 // Get the PHI translated pointer in this predecessor. This can fail if
1298 // not translatable, in which case the getAddr() returns null.
1299 PHITransAddr &PredPointer = PredList.back().second;
1300 Value *PredPtrVal =
1301 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1302
1303 // Check to see if we have already visited this pred block with another
1304 // pointer. If so, we can't do this lookup. This failure can occur
1305 // with PHI translation when a critical edge exists and the PHI node in
1306 // the successor translates to a pointer value different than the
1307 // pointer the block was first analyzed with.
1308 Value *PrevVal = lookupNonLocalPointerDepVisited(Pred);
1309 if (!PrevVal) {
1310 setNonLocalPointerDepVisited(Pred, PredPtrVal);
1311 continue;
1312 }
1313
1314 // We found the pred; take it off the list of preds to visit.
1315 PredList.pop_back();
1316
1317 // If the predecessor was visited with PredPtr, then we already did
1318 // the analysis and can ignore it.
1319 if (PrevVal == PredPtrVal)
1320 continue;
1321
1322 // Otherwise, the block was previously analyzed with a different
1323 // pointer. We can't represent the result of this case, so we just
1324 // treat this as a phi translation failure.
1325
1326 // Make sure to clean up the Visited map before continuing on to
1327 // PredTranslationFailure.
1328 for (const auto &Pred : PredList)
1329 setNonLocalPointerDepVisited(Pred.first, nullptr);
1330
1331 goto PredTranslationFailure;
1332 }
1333
1334 // Actually process results here; this need to be a separate loop to avoid
1335 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1336 // any results for. (getNonLocalPointerDepFromBB will modify our
1337 // datastructures in ways the code after the PredTranslationFailure label
1338 // doesn't expect.)
1339 for (auto &I : PredList) {
1340 BasicBlock *Pred = I.first;
1341 PHITransAddr &PredPointer = I.second;
1342 Value *PredPtrVal = PredPointer.getAddr();
1343
1344 bool CanTranslate = true;
1345 // If PHI translation was unable to find an available pointer in this
1346 // predecessor, then we have to assume that the pointer is clobbered in
1347 // that predecessor. We can still do PRE of the load, which would insert
1348 // a computation of the pointer in this predecessor.
1349 if (!PredPtrVal)
1350 CanTranslate = false;
1351
1352 // FIXME: it is entirely possible that PHI translating will end up with
1353 // the same value. Consider PHI translating something like:
1354 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1355 // to recurse here, pedantically speaking.
1356
1357 // If getNonLocalPointerDepFromBB fails here, that means the cached
1358 // result conflicted with the Visited list; we have to conservatively
1359 // assume it is unknown, but this also does not block PRE of the load.
1360 if (!CanTranslate ||
1361 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1362 Loc.getWithNewPtr(PredPtrVal), isLoad,
1363 Pred, Result)) {
1364 // Add the entry to the Result list.
1365 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1366 Result.push_back(Entry);
1367
1368 // Since we had a phi translation failure, the cache for CacheKey won't
1369 // include all of the entries that we need to immediately satisfy future
1370 // queries. Mark this in NonLocalPointerDeps by setting the
1371 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1372 // cached value to do more work but not miss the phi trans failure.
1373 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1374 NLPI.Pair = BBSkipFirstBlockPair();
1375 continue;
1376 }
1377 }
1378
1379 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1380 CacheInfo = &NonLocalPointerDeps[CacheKey];
1381 Cache = &CacheInfo->NonLocalDeps;
1382 NumSortedEntries = Cache->size();
1383
1384 // Since we did phi translation, the "Cache" set won't contain all of the
1385 // results for the query. This is ok (we can still use it to accelerate
1386 // specific block queries) but we can't do the fastpath "return all
1387 // results from the set" Clear out the indicator for this.
1388 CacheInfo->Pair = BBSkipFirstBlockPair();
1389 SkipFirstBlock = false;
1390 continue;
1391
1392 PredTranslationFailure:
1393 // The following code is "failure"; we can't produce a sane translation
1394 // for the given block. It assumes that we haven't modified any of
1395 // our datastructures while processing the current block.
1396
1397 if (!Cache) {
1398 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1399 CacheInfo = &NonLocalPointerDeps[CacheKey];
1400 Cache = &CacheInfo->NonLocalDeps;
1401 NumSortedEntries = Cache->size();
1402 }
1403
1404 // Since we failed phi translation, the "Cache" set won't contain all of the
1405 // results for the query. This is ok (we can still use it to accelerate
1406 // specific block queries) but we can't do the fastpath "return all
1407 // results from the set". Clear out the indicator for this.
1408 CacheInfo->Pair = BBSkipFirstBlockPair();
1409
1410 // If *nothing* works, mark the pointer as unknown.
1411 //
1412 // If this is the magic first block, return this as a clobber of the whole
1413 // incoming value. Since we can't phi translate to one of the predecessors,
1414 // we have to bail out.
1415 if (SkipFirstBlock)
1416 return false;
1417
1418 // Results of invariant loads are not cached thus no need to update cached
1419 // information.
1420 if (!isInvariantLoad) {
1421 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1422 if (I.getBB() != BB)
1423 continue;
1424
1425 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1426 !DT.isReachableFromEntry(BB)) &&
1427 "Should only be here with transparent block");
1428
1429 I.setResult(MemDepResult::getUnknown());
1430
1431
1432 break;
1433 }
1434 }
1435 (void)GotWorklistLimit;
1436 // Go ahead and report unknown dependence.
1437 Result.push_back(
1438 NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
1439 }
1440
1441 // Okay, we're done now. If we added new values to the cache, re-sort it.
1442 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1443 LLVM_DEBUG(AssertSorted(*Cache));
1444 return true;
1445}
1446
1447/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1448void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1449 ValueIsLoadPair P) {
1450
1451 // Most of the time this cache is empty.
1452 if (!NonLocalDefsCache.empty()) {
1453 auto it = NonLocalDefsCache.find(P.getPointer());
1454 if (it != NonLocalDefsCache.end()) {
1455 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1456 it->second.getResult().getInst(), P.getPointer());
1457 NonLocalDefsCache.erase(it);
1458 }
1459
1460 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1461 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1462 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1463 for (const auto *entry : toRemoveIt->second)
1464 NonLocalDefsCache.erase(entry);
1465 ReverseNonLocalDefsCache.erase(toRemoveIt);
1466 }
1467 }
1468 }
1469
1470 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1471 if (It == NonLocalPointerDeps.end())
1472 return;
1473
1474 // Remove all of the entries in the BB->val map. This involves removing
1475 // instructions from the reverse map.
1476 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1477
1478 for (const NonLocalDepEntry &DE : PInfo) {
1479 Instruction *Target = DE.getResult().getInst();
1480 if (!Target)
1481 continue; // Ignore non-local dep results.
1482 assert(Target->getParent() == DE.getBB());
1483
1484 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1485 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1486 }
1487
1488 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1489 NonLocalPointerDeps.erase(It);
1490}
1491
1493 // If Ptr isn't really a pointer, just ignore it.
1494 if (!Ptr->getType()->isPointerTy())
1495 return;
1496 // Flush store info for the pointer.
1497 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1498 // Flush load info for the pointer.
1499 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1500}
1501
1503 PredCache.clear();
1504}
1505
1507 EEA.removeInstruction(RemInst);
1508
1509 // Walk through the Non-local dependencies, removing this one as the value
1510 // for any cached queries.
1511 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1512 if (NLDI != NonLocalDepsMap.end()) {
1513 NonLocalDepInfo &BlockMap = NLDI->second.first;
1514 for (auto &Entry : BlockMap)
1515 if (Instruction *Inst = Entry.getResult().getInst())
1516 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1517 NonLocalDepsMap.erase(NLDI);
1518 }
1519
1520 // If we have a cached local dependence query for this instruction, remove it.
1521 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1522 if (LocalDepEntry != LocalDeps.end()) {
1523 // Remove us from DepInst's reverse set now that the local dep info is gone.
1524 if (Instruction *Inst = LocalDepEntry->second.getInst())
1525 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1526
1527 // Remove this local dependency info.
1528 LocalDeps.erase(LocalDepEntry);
1529 }
1530
1531 // If we have any cached dependencies on this instruction, remove
1532 // them.
1533
1534 // If the instruction is a pointer, remove it from both the load info and the
1535 // store info.
1536 if (RemInst->getType()->isPointerTy()) {
1537 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1538 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1539 } else {
1540 // Otherwise, if the instructions is in the map directly, it must be a load.
1541 // Remove it.
1542 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1543 if (toRemoveIt != NonLocalDefsCache.end()) {
1544 assert(isa<LoadInst>(RemInst) &&
1545 "only load instructions should be added directly");
1546 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1547 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1548 NonLocalDefsCache.erase(toRemoveIt);
1549 }
1550 }
1551
1552 // Loop over all of the things that depend on the instruction we're removing.
1554
1555 // If we find RemInst as a clobber or Def in any of the maps for other values,
1556 // we need to replace its entry with a dirty version of the instruction after
1557 // it. If RemInst is a terminator, we use a null dirty value.
1558 //
1559 // Using a dirty version of the instruction after RemInst saves having to scan
1560 // the entire block to get to this point.
1561 MemDepResult NewDirtyVal;
1562 if (!RemInst->isTerminator())
1563 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1564
1565 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1566 if (ReverseDepIt != ReverseLocalDeps.end()) {
1567 // RemInst can't be the terminator if it has local stuff depending on it.
1568 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1569 "Nothing can locally depend on a terminator");
1570
1571 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1572 assert(InstDependingOnRemInst != RemInst &&
1573 "Already removed our local dep info");
1574
1575 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1576
1577 // Make sure to remember that new things depend on NewDepInst.
1578 assert(NewDirtyVal.getInst() &&
1579 "There is no way something else can have "
1580 "a local dep on this if it is a terminator!");
1581 ReverseDepsToAdd.push_back(
1582 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1583 }
1584
1585 ReverseLocalDeps.erase(ReverseDepIt);
1586
1587 // Add new reverse deps after scanning the set, to avoid invalidating the
1588 // 'ReverseDeps' reference.
1589 while (!ReverseDepsToAdd.empty()) {
1590 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1591 ReverseDepsToAdd.back().second);
1592 ReverseDepsToAdd.pop_back();
1593 }
1594 }
1595
1596 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1597 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1598 for (Instruction *I : ReverseDepIt->second) {
1599 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1600
1601 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1602 // The information is now dirty!
1603 INLD.second = true;
1604
1605 for (auto &Entry : INLD.first) {
1606 if (Entry.getResult().getInst() != RemInst)
1607 continue;
1608
1609 // Convert to a dirty entry for the subsequent instruction.
1610 Entry.setResult(NewDirtyVal);
1611
1612 if (Instruction *NextI = NewDirtyVal.getInst())
1613 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1614 }
1615 }
1616
1617 ReverseNonLocalDeps.erase(ReverseDepIt);
1618
1619 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1620 while (!ReverseDepsToAdd.empty()) {
1621 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1622 ReverseDepsToAdd.back().second);
1623 ReverseDepsToAdd.pop_back();
1624 }
1625 }
1626
1627 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1628 // value in the NonLocalPointerDeps info.
1629 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1630 ReverseNonLocalPtrDeps.find(RemInst);
1631 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1633 ReversePtrDepsToAdd;
1634
1635 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1636 assert(P.getPointer() != RemInst &&
1637 "Already removed NonLocalPointerDeps info for RemInst");
1638
1639 auto &NLPD = NonLocalPointerDeps[P];
1640
1641 NonLocalDepInfo &NLPDI = NLPD.NonLocalDeps;
1642
1643 // The cache is not valid for any specific block anymore.
1644 NLPD.Pair = BBSkipFirstBlockPair();
1645
1646 // Update any entries for RemInst to use the instruction after it.
1647 for (auto &Entry : NLPDI) {
1648 if (Entry.getResult().getInst() != RemInst)
1649 continue;
1650
1651 // Convert to a dirty entry for the subsequent instruction.
1652 Entry.setResult(NewDirtyVal);
1653
1654 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1655 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1656 }
1657
1658 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1659 // subsequent value may invalidate the sortedness.
1660 llvm::sort(NLPDI);
1661 }
1662
1663 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1664
1665 while (!ReversePtrDepsToAdd.empty()) {
1666 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1667 ReversePtrDepsToAdd.back().second);
1668 ReversePtrDepsToAdd.pop_back();
1669 }
1670 }
1671
1672 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1673 LLVM_DEBUG(verifyRemoved(RemInst));
1674}
1675
1676/// Verify that the specified instruction does not occur in our internal data
1677/// structures.
1678///
1679/// This function verifies by asserting in debug builds.
1680void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1681#ifndef NDEBUG
1682 for (const auto &DepKV : LocalDeps) {
1683 assert(DepKV.first != D && "Inst occurs in data structures");
1684 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1685 }
1686
1687 for (const auto &DepKV : NonLocalPointerDeps) {
1688 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1689 for (const auto &Entry : DepKV.second.NonLocalDeps)
1690 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1691 }
1692
1693 for (const auto &DepKV : NonLocalDepsMap) {
1694 assert(DepKV.first != D && "Inst occurs in data structures");
1695 const PerInstNLInfo &INLD = DepKV.second;
1696 for (const auto &Entry : INLD.first)
1697 assert(Entry.getResult().getInst() != D &&
1698 "Inst occurs in data structures");
1699 }
1700
1701 for (const auto &DepKV : ReverseLocalDeps) {
1702 assert(DepKV.first != D && "Inst occurs in data structures");
1703 for (Instruction *Inst : DepKV.second)
1704 assert(Inst != D && "Inst occurs in data structures");
1705 }
1706
1707 for (const auto &DepKV : ReverseNonLocalDeps) {
1708 assert(DepKV.first != D && "Inst occurs in data structures");
1709 for (Instruction *Inst : DepKV.second)
1710 assert(Inst != D && "Inst occurs in data structures");
1711 }
1712
1713 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1714 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1715
1716 for (ValueIsLoadPair P : DepKV.second)
1717 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1718 "Inst occurs in ReverseNonLocalPtrDeps map");
1719 }
1720#endif
1721}
1722
1723AnalysisKey MemoryDependenceAnalysis::Key;
1724
1727
1730 auto &AA = AM.getResult<AAManager>(F);
1731 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1732 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1733 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1734 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1735}
1736
1738
1740 "Memory Dependence Analysis", false, true)
1746 "Memory Dependence Analysis", false, true)
1747
1749
1751
1753 MemDep.reset();
1754}
1755
1763
1765 FunctionAnalysisManager::Invalidator &Inv) {
1766 // Check whether our analysis is preserved.
1767 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1768 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1769 // If not, give up now.
1770 return true;
1771
1772 // Check whether the analyses we depend on became invalid for any reason.
1773 if (Inv.invalidate<AAManager>(F, PA) ||
1774 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1775 Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1776 return true;
1777
1778 // Otherwise this analysis result remains valid.
1779 return false;
1780}
1781
1783 return DefaultBlockScanLimit;
1784}
1785
1787 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1788 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1789 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1790 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1791 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1792 return false;
1793}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const unsigned int NumResultsLimit
static cl::opt< unsigned > CacheGlobalLimit("memdep-cache-global-limit", cl::Hidden, cl::init(10000), cl::desc("The max number of entries allowed in a cache (default = 10000)"))
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
This file contains the declarations for metadata subclasses.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
The possible results of an alias query.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
unsigned getNumber() const
Definition BasicBlock.h:95
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
An instruction for ordering other memory operations.
FunctionPass(char &pid)
Definition Pass.h:316
const BasicBlock & getEntryBlock() const
Definition Function.h:809
unsigned getMaxBlockNumber() const
Return a value larger than the largest block number.
Definition Function.h:828
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
bool isTerminator() const
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Value * getPointerOperand()
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Abstract Attribute helper functions.
Definition Attributor.h:165
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool isStrongerThanUnordered(AtomicOrdering AO)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoModRef(const ModRefInfo MRI)
Definition ModRef.h:40
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29