LLVM 20.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
83// Limit on the number of memdep results to process.
84static const unsigned int NumResultsLimit = 100;
85
86/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
87///
88/// If the set becomes empty, remove Inst's entry.
89template <typename KeyTy>
90static void
92 Instruction *Inst, KeyTy Val) {
93 typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
94 ReverseMap.find(Inst);
95 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
96 bool Found = InstIt->second.erase(Val);
97 assert(Found && "Invalid reverse map!");
98 (void)Found;
99 if (InstIt->second.empty())
100 ReverseMap.erase(InstIt);
101}
102
103/// If the given instruction references a specific memory location, fill in Loc
104/// with the details, otherwise set Loc.Ptr to null.
105///
106/// Returns a ModRefInfo value describing the general behavior of the
107/// instruction.
109 const TargetLibraryInfo &TLI) {
110 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111 if (LI->isUnordered()) {
112 Loc = MemoryLocation::get(LI);
113 return ModRefInfo::Ref;
114 }
115 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
116 Loc = MemoryLocation::get(LI);
117 return ModRefInfo::ModRef;
118 }
119 Loc = MemoryLocation();
120 return ModRefInfo::ModRef;
121 }
122
123 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124 if (SI->isUnordered()) {
125 Loc = MemoryLocation::get(SI);
126 return ModRefInfo::Mod;
127 }
128 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
129 Loc = MemoryLocation::get(SI);
130 return ModRefInfo::ModRef;
131 }
132 Loc = MemoryLocation();
133 return ModRefInfo::ModRef;
134 }
135
136 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
137 Loc = MemoryLocation::get(V);
138 return ModRefInfo::ModRef;
139 }
140
141 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
143 // calls to free() deallocate the entire structure
144 Loc = MemoryLocation::getAfter(FreedOp);
145 return ModRefInfo::Mod;
146 }
147 }
148
149 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
150 switch (II->getIntrinsicID()) {
151 case Intrinsic::lifetime_start:
152 case Intrinsic::lifetime_end:
153 case Intrinsic::invariant_start:
154 Loc = MemoryLocation::getForArgument(II, 1, TLI);
155 // These intrinsics don't really modify the memory, but returning Mod
156 // will allow them to be handled conservatively.
157 return ModRefInfo::Mod;
158 case Intrinsic::invariant_end:
159 Loc = MemoryLocation::getForArgument(II, 2, TLI);
160 // These intrinsics don't really modify the memory, but returning Mod
161 // will allow them to be handled conservatively.
162 return ModRefInfo::Mod;
163 case Intrinsic::masked_load:
164 Loc = MemoryLocation::getForArgument(II, 0, TLI);
165 return ModRefInfo::Ref;
166 case Intrinsic::masked_store:
167 Loc = MemoryLocation::getForArgument(II, 1, TLI);
168 return ModRefInfo::Mod;
169 default:
170 break;
171 }
172 }
173
174 // Otherwise, just do the coarse-grained thing that always works.
175 if (Inst->mayWriteToMemory())
176 return ModRefInfo::ModRef;
177 if (Inst->mayReadFromMemory())
178 return ModRefInfo::Ref;
179 return ModRefInfo::NoModRef;
180}
181
182/// Private helper for finding the local dependencies of a call site.
183MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185 BasicBlock *BB) {
186 unsigned Limit = getDefaultBlockScanLimit();
187
188 // Walk backwards through the block, looking for dependencies.
189 while (ScanIt != BB->begin()) {
190 Instruction *Inst = &*--ScanIt;
191 // Debug intrinsics don't cause dependences and should not affect Limit
192 if (isa<DbgInfoIntrinsic>(Inst))
193 continue;
194
195 // Limit the amount of scanning we do so we don't end up with quadratic
196 // running time on extreme testcases.
197 --Limit;
198 if (!Limit)
200
201 // If this inst is a memory op, get the pointer it accessed
202 MemoryLocation Loc;
203 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204 if (Loc.Ptr) {
205 // A simple instruction.
206 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207 return MemDepResult::getClobber(Inst);
208 continue;
209 }
210
211 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212 // If these two calls do not interfere, look past it.
213 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214 // If the two calls are the same, return Inst as a Def, so that
215 // Call can be found redundant and eliminated.
216 if (isReadOnlyCall && !isModSet(MR) &&
217 Call->isIdenticalToWhenDefined(CallB))
218 return MemDepResult::getDef(Inst);
219
220 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221 // keep scanning.
222 continue;
223 } else
224 return MemDepResult::getClobber(Inst);
225 }
226
227 // If we could not obtain a pointer for the instruction and the instruction
228 // touches memory then assume that this is a dependency.
229 if (isModOrRefSet(MR))
230 return MemDepResult::getClobber(Inst);
231 }
232
233 // No dependence found. If this is the entry block of the function, it is
234 // unknown, otherwise it is non-local.
235 if (BB != &BB->getParent()->getEntryBlock())
238}
239
241 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
242 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
243 BatchAAResults &BatchAA) {
244 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
245 if (QueryInst != nullptr) {
246 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
247 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
248
249 if (InvariantGroupDependency.isDef())
250 return InvariantGroupDependency;
251 }
252 }
254 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
255 if (SimpleDep.isDef())
256 return SimpleDep;
257 // Non-local invariant group dependency indicates there is non local Def
258 // (it only returns nonLocal if it finds nonLocal def), which is better than
259 // local clobber and everything else.
260 if (InvariantGroupDependency.isNonLocal())
261 return InvariantGroupDependency;
262
263 assert(InvariantGroupDependency.isUnknown() &&
264 "InvariantGroupDependency should be only unknown at this point");
265 return SimpleDep;
266}
267
269 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
270 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
271 BatchAAResults BatchAA(AA, &EEA);
272 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
273 BatchAA);
274}
275
278 BasicBlock *BB) {
279
280 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
282
283 // Take the ptr operand after all casts and geps 0. This way we can search
284 // cast graph down only.
285 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
286
287 // It's is not safe to walk the use list of global value, because function
288 // passes aren't allowed to look outside their functions.
289 // FIXME: this could be fixed by filtering instructions from outside
290 // of current function.
291 if (isa<GlobalValue>(LoadOperand))
293
294 Instruction *ClosestDependency = nullptr;
295 // Order of instructions in uses list is unpredictible. In order to always
296 // get the same result, we will look for the closest dominance.
297 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
298 assert(Other && "Must call it with not null instruction");
299 if (Best == nullptr || DT.dominates(Best, Other))
300 return Other;
301 return Best;
302 };
303
304 for (const Use &Us : LoadOperand->uses()) {
305 auto *U = dyn_cast<Instruction>(Us.getUser());
306 if (!U || U == LI || !DT.dominates(U, LI))
307 continue;
308
309 // If we hit load/store with the same invariant.group metadata (and the
310 // same pointer operand) we can assume that value pointed by pointer
311 // operand didn't change.
312 if ((isa<LoadInst>(U) ||
313 (isa<StoreInst>(U) &&
314 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
315 U->hasMetadata(LLVMContext::MD_invariant_group))
316 ClosestDependency = GetClosestDependency(ClosestDependency, U);
317 }
318
319 if (!ClosestDependency)
321 if (ClosestDependency->getParent() == BB)
322 return MemDepResult::getDef(ClosestDependency);
323 // Def(U) can't be returned here because it is non-local. If local
324 // dependency won't be found then return nonLocal counting that the
325 // user will call getNonLocalPointerDependency, which will return cached
326 // result.
327 NonLocalDefsCache.try_emplace(
328 LI, NonLocalDepResult(ClosestDependency->getParent(),
329 MemDepResult::getDef(ClosestDependency), nullptr));
330 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
332}
333
334// Check if SI that may alias with MemLoc can be safely skipped. This is
335// possible in case if SI can only must alias or no alias with MemLoc (no
336// partial overlapping possible) and it writes the same value that MemLoc
337// contains now (it was loaded before this store and was not modified in
338// between).
339static bool canSkipClobberingStore(const StoreInst *SI,
340 const MemoryLocation &MemLoc,
341 Align MemLocAlign, BatchAAResults &BatchAA,
342 unsigned ScanLimit) {
343 if (!MemLoc.Size.hasValue())
344 return false;
345 if (MemoryLocation::get(SI).Size != MemLoc.Size)
346 return false;
347 if (MemLoc.Size.isScalable())
348 return false;
349 if (std::min(MemLocAlign, SI->getAlign()).value() <
350 MemLoc.Size.getValue().getKnownMinValue())
351 return false;
352
353 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
354 if (!LI || LI->getParent() != SI->getParent())
355 return false;
356 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
357 return false;
358 unsigned NumVisitedInsts = 0;
359 for (const Instruction *I = LI; I != SI; I = I->getNextNonDebugInstruction())
360 if (++NumVisitedInsts > ScanLimit ||
361 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
362 return false;
363
364 return true;
365}
366
368 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
369 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
370 BatchAAResults &BatchAA) {
371 bool isInvariantLoad = false;
372 Align MemLocAlign =
374
375 unsigned DefaultLimit = getDefaultBlockScanLimit();
376 if (!Limit)
377 Limit = &DefaultLimit;
378
379 // We must be careful with atomic accesses, as they may allow another thread
380 // to touch this location, clobbering it. We are conservative: if the
381 // QueryInst is not a simple (non-atomic) memory access, we automatically
382 // return getClobber.
383 // If it is simple, we know based on the results of
384 // "Compiler testing via a theory of sound optimisations in the C11/C++11
385 // memory model" in PLDI 2013, that a non-atomic location can only be
386 // clobbered between a pair of a release and an acquire action, with no
387 // access to the location in between.
388 // Here is an example for giving the general intuition behind this rule.
389 // In the following code:
390 // store x 0;
391 // release action; [1]
392 // acquire action; [4]
393 // %val = load x;
394 // It is unsafe to replace %val by 0 because another thread may be running:
395 // acquire action; [2]
396 // store x 42;
397 // release action; [3]
398 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
399 // being 42. A key property of this program however is that if either
400 // 1 or 4 were missing, there would be a race between the store of 42
401 // either the store of 0 or the load (making the whole program racy).
402 // The paper mentioned above shows that the same property is respected
403 // by every program that can detect any optimization of that kind: either
404 // it is racy (undefined) or there is a release followed by an acquire
405 // between the pair of accesses under consideration.
406
407 // If the load is invariant, we "know" that it doesn't alias *any* write. We
408 // do want to respect mustalias results since defs are useful for value
409 // forwarding, but any mayalias write can be assumed to be noalias.
410 // Arguably, this logic should be pushed inside AliasAnalysis itself.
411 if (isLoad && QueryInst)
412 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
413 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
414 isInvariantLoad = true;
415 MemLocAlign = LI->getAlign();
416 }
417
418 // True for volatile instruction.
419 // For Load/Store return true if atomic ordering is stronger than AO,
420 // for other instruction just true if it can read or write to memory.
421 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
422 if (I->isVolatile())
423 return true;
424 if (auto *LI = dyn_cast<LoadInst>(I))
425 return isStrongerThan(LI->getOrdering(), AO);
426 if (auto *SI = dyn_cast<StoreInst>(I))
427 return isStrongerThan(SI->getOrdering(), AO);
428 return I->mayReadOrWriteMemory();
429 };
430
431 // Walk backwards through the basic block, looking for dependencies.
432 while (ScanIt != BB->begin()) {
433 Instruction *Inst = &*--ScanIt;
434
435 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
436 // Debug intrinsics don't (and can't) cause dependencies.
437 if (isa<DbgInfoIntrinsic>(II))
438 continue;
439
440 // Limit the amount of scanning we do so we don't end up with quadratic
441 // running time on extreme testcases.
442 --*Limit;
443 if (!*Limit)
445
446 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
447 // If we reach a lifetime begin or end marker, then the query ends here
448 // because the value is undefined.
449 Intrinsic::ID ID = II->getIntrinsicID();
450 switch (ID) {
451 case Intrinsic::lifetime_start: {
452 // FIXME: This only considers queries directly on the invariant-tagged
453 // pointer, not on query pointers that are indexed off of them. It'd
454 // be nice to handle that at some point (the right approach is to use
455 // GetPointerBaseWithConstantOffset).
456 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
457 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
458 return MemDepResult::getDef(II);
459 continue;
460 }
461 case Intrinsic::masked_load:
462 case Intrinsic::masked_store: {
463 MemoryLocation Loc;
464 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
465 AliasResult R = BatchAA.alias(Loc, MemLoc);
466 if (R == AliasResult::NoAlias)
467 continue;
468 if (R == AliasResult::MustAlias)
469 return MemDepResult::getDef(II);
470 if (ID == Intrinsic::masked_load)
471 continue;
473 }
474 }
475 }
476
477 // Values depend on loads if the pointers are must aliased. This means
478 // that a load depends on another must aliased load from the same value.
479 // One exception is atomic loads: a value can depend on an atomic load that
480 // it does not alias with when this atomic load indicates that another
481 // thread may be accessing the location.
482 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
483 // While volatile access cannot be eliminated, they do not have to clobber
484 // non-aliasing locations, as normal accesses, for example, can be safely
485 // reordered with volatile accesses.
486 if (LI->isVolatile()) {
487 if (!QueryInst)
488 // Original QueryInst *may* be volatile
489 return MemDepResult::getClobber(LI);
490 if (QueryInst->isVolatile())
491 // Ordering required if QueryInst is itself volatile
492 return MemDepResult::getClobber(LI);
493 // Otherwise, volatile doesn't imply any special ordering
494 }
495
496 // Atomic loads have complications involved.
497 // A Monotonic (or higher) load is OK if the query inst is itself not
498 // atomic.
499 // FIXME: This is overly conservative.
500 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
501 if (!QueryInst ||
502 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
503 return MemDepResult::getClobber(LI);
504 if (LI->getOrdering() != AtomicOrdering::Monotonic)
505 return MemDepResult::getClobber(LI);
506 }
507
509
510 // If we found a pointer, check if it could be the same as our pointer.
511 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
512
513 if (R == AliasResult::NoAlias)
514 continue;
515
516 if (isLoad) {
517 // Must aliased loads are defs of each other.
518 if (R == AliasResult::MustAlias)
519 return MemDepResult::getDef(Inst);
520
521 // If we have a partial alias, then return this as a clobber for the
522 // client to handle.
523 if (R == AliasResult::PartialAlias && R.hasOffset()) {
524 ClobberOffsets[LI] = R.getOffset();
525 return MemDepResult::getClobber(Inst);
526 }
527
528 // Random may-alias loads don't depend on each other without a
529 // dependence.
530 continue;
531 }
532
533 // Stores don't alias loads from read-only memory.
534 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
535 continue;
536
537 // Stores depend on may/must aliased loads.
538 return MemDepResult::getDef(Inst);
539 }
540
541 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
542 // Atomic stores have complications involved.
543 // A Monotonic store is OK if the query inst is itself not atomic.
544 // FIXME: This is overly conservative.
545 if (!SI->isUnordered() && SI->isAtomic()) {
546 if (!QueryInst ||
547 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
548 return MemDepResult::getClobber(SI);
549 // Ok, if we are here the guard above guarantee us that
550 // QueryInst is a non-atomic or unordered load/store.
551 // SI is atomic with monotonic or release semantic (seq_cst for store
552 // is actually a release semantic plus total order over other seq_cst
553 // instructions, as soon as QueryInst is not seq_cst we can consider it
554 // as simple release semantic).
555 // Monotonic and Release semantic allows re-ordering before store
556 // so we are safe to go further and check the aliasing. It will prohibit
557 // re-ordering in case locations are may or must alias.
558 }
559
560 // While volatile access cannot be eliminated, they do not have to clobber
561 // non-aliasing locations, as normal accesses can for example be reordered
562 // with volatile accesses.
563 if (SI->isVolatile())
564 if (!QueryInst || QueryInst->isVolatile())
565 return MemDepResult::getClobber(SI);
566
567 // If alias analysis can tell that this store is guaranteed to not modify
568 // the query pointer, ignore it. Use getModRefInfo to handle cases where
569 // the query pointer points to constant memory etc.
570 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
571 continue;
572
573 // Ok, this store might clobber the query pointer. Check to see if it is
574 // a must alias: in this case, we want to return this as a def.
575 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
576 MemoryLocation StoreLoc = MemoryLocation::get(SI);
577
578 // If we found a pointer, check if it could be the same as our pointer.
579 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
580
581 if (R == AliasResult::NoAlias)
582 continue;
583 if (R == AliasResult::MustAlias)
584 return MemDepResult::getDef(Inst);
585 if (isInvariantLoad)
586 continue;
587 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
588 continue;
589 return MemDepResult::getClobber(Inst);
590 }
591
592 // If this is an allocation, and if we know that the accessed pointer is to
593 // the allocation, return Def. This means that there is no dependence and
594 // the access can be optimized based on that. For example, a load could
595 // turn into undef. Note that we can bypass the allocation itself when
596 // looking for a clobber in many cases; that's an alias property and is
597 // handled by BasicAA.
598 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
599 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
600 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
601 return MemDepResult::getDef(Inst);
602 }
603
604 // If we found a select instruction for MemLoc pointer, return it as Def
605 // dependency.
606 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
607 return MemDepResult::getDef(Inst);
608
609 if (isInvariantLoad)
610 continue;
611
612 // A release fence requires that all stores complete before it, but does
613 // not prevent the reordering of following loads or stores 'before' the
614 // fence. As a result, we look past it when finding a dependency for
615 // loads. DSE uses this to find preceding stores to delete and thus we
616 // can't bypass the fence if the query instruction is a store.
617 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
618 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
619 continue;
620
621 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
622 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
624 // If the call has no effect on the queried pointer, just ignore it.
625 continue;
626 case ModRefInfo::Mod:
627 return MemDepResult::getClobber(Inst);
628 case ModRefInfo::Ref:
629 // If the call is known to never store to the pointer, and if this is a
630 // load query, we can safely ignore it (scan past it).
631 if (isLoad)
632 continue;
633 [[fallthrough]];
634 default:
635 // Otherwise, there is a potential dependence. Return a clobber.
636 return MemDepResult::getClobber(Inst);
637 }
638 }
639
640 // No dependence found. If this is the entry block of the function, it is
641 // unknown, otherwise it is non-local.
642 if (BB != &BB->getParent()->getEntryBlock())
645}
646
648 ClobberOffsets.clear();
649 Instruction *ScanPos = QueryInst;
650
651 // Check for a cached result
652 MemDepResult &LocalCache = LocalDeps[QueryInst];
653
654 // If the cached entry is non-dirty, just return it. Note that this depends
655 // on MemDepResult's default constructing to 'dirty'.
656 if (!LocalCache.isDirty())
657 return LocalCache;
658
659 // Otherwise, if we have a dirty entry, we know we can start the scan at that
660 // instruction, which may save us some work.
661 if (Instruction *Inst = LocalCache.getInst()) {
662 ScanPos = Inst;
663
664 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
665 }
666
667 BasicBlock *QueryParent = QueryInst->getParent();
668
669 // Do the scan.
670 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
671 // No dependence found. If this is the entry block of the function, it is
672 // unknown, otherwise it is non-local.
673 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
674 LocalCache = MemDepResult::getNonLocal();
675 else
676 LocalCache = MemDepResult::getNonFuncLocal();
677 } else {
678 MemoryLocation MemLoc;
679 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
680 if (MemLoc.Ptr) {
681 // If we can do a pointer scan, make it happen.
682 bool isLoad = !isModSet(MR);
683 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
684 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
685
686 LocalCache =
687 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
688 QueryParent, QueryInst, nullptr);
689 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
690 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
691 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
692 ScanPos->getIterator(), QueryParent);
693 } else
694 // Non-memory instruction.
695 LocalCache = MemDepResult::getUnknown();
696 }
697
698 // Remember the result!
699 if (Instruction *I = LocalCache.getInst())
700 ReverseLocalDeps[I].insert(QueryInst);
701
702 return LocalCache;
703}
704
705#ifndef NDEBUG
706/// This method is used when -debug is specified to verify that cache arrays
707/// are properly kept sorted.
709 int Count = -1) {
710 if (Count == -1)
711 Count = Cache.size();
712 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
713 "Cache isn't sorted!");
714}
715#endif
716
719 assert(getDependency(QueryCall).isNonLocal() &&
720 "getNonLocalCallDependency should only be used on calls with "
721 "non-local deps!");
722 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
723 NonLocalDepInfo &Cache = CacheP.first;
724
725 // This is the set of blocks that need to be recomputed. In the cached case,
726 // this can happen due to instructions being deleted etc. In the uncached
727 // case, this starts out as the set of predecessors we care about.
729
730 if (!Cache.empty()) {
731 // Okay, we have a cache entry. If we know it is not dirty, just return it
732 // with no computation.
733 if (!CacheP.second) {
734 ++NumCacheNonLocal;
735 return Cache;
736 }
737
738 // If we already have a partially computed set of results, scan them to
739 // determine what is dirty, seeding our initial DirtyBlocks worklist.
740 for (auto &Entry : Cache)
741 if (Entry.getResult().isDirty())
742 DirtyBlocks.push_back(Entry.getBB());
743
744 // Sort the cache so that we can do fast binary search lookups below.
745 llvm::sort(Cache);
746
747 ++NumCacheDirtyNonLocal;
748 } else {
749 // Seed DirtyBlocks with each of the preds of QueryInst's block.
750 BasicBlock *QueryBB = QueryCall->getParent();
751 append_range(DirtyBlocks, PredCache.get(QueryBB));
752 ++NumUncacheNonLocal;
753 }
754
755 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
756 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
757
759
760 unsigned NumSortedEntries = Cache.size();
761 LLVM_DEBUG(AssertSorted(Cache));
762
763 // Iterate while we still have blocks to update.
764 while (!DirtyBlocks.empty()) {
765 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
766
767 // Already processed this block?
768 if (!Visited.insert(DirtyBB).second)
769 continue;
770
771 // Do a binary search to see if we already have an entry for this block in
772 // the cache set. If so, find it.
773 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
774 NonLocalDepInfo::iterator Entry =
775 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
776 NonLocalDepEntry(DirtyBB));
777 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
778 --Entry;
779
780 NonLocalDepEntry *ExistingResult = nullptr;
781 if (Entry != Cache.begin() + NumSortedEntries &&
782 Entry->getBB() == DirtyBB) {
783 // If we already have an entry, and if it isn't already dirty, the block
784 // is done.
785 if (!Entry->getResult().isDirty())
786 continue;
787
788 // Otherwise, remember this slot so we can update the value.
789 ExistingResult = &*Entry;
790 }
791
792 // If the dirty entry has a pointer, start scanning from it so we don't have
793 // to rescan the entire block.
794 BasicBlock::iterator ScanPos = DirtyBB->end();
795 if (ExistingResult) {
796 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
797 ScanPos = Inst->getIterator();
798 // We're removing QueryInst's use of Inst.
799 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
800 QueryCall);
801 }
802 }
803
804 // Find out if this block has a local dependency for QueryInst.
805 MemDepResult Dep;
806
807 if (ScanPos != DirtyBB->begin()) {
808 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
809 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
810 // No dependence found. If this is the entry block of the function, it is
811 // a clobber, otherwise it is unknown.
813 } else {
815 }
816
817 // If we had a dirty entry for the block, update it. Otherwise, just add
818 // a new entry.
819 if (ExistingResult)
820 ExistingResult->setResult(Dep);
821 else
822 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
823
824 // If the block has a dependency (i.e. it isn't completely transparent to
825 // the value), remember the association!
826 if (!Dep.isNonLocal()) {
827 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
828 // update this when we remove instructions.
829 if (Instruction *Inst = Dep.getInst())
830 ReverseNonLocalDeps[Inst].insert(QueryCall);
831 } else {
832
833 // If the block *is* completely transparent to the load, we need to check
834 // the predecessors of this block. Add them to our worklist.
835 append_range(DirtyBlocks, PredCache.get(DirtyBB));
836 }
837 }
838
839 return Cache;
840}
841
844 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
845 bool isLoad = isa<LoadInst>(QueryInst);
846 BasicBlock *FromBB = QueryInst->getParent();
847 assert(FromBB);
848
849 assert(Loc.Ptr->getType()->isPointerTy() &&
850 "Can't get pointer deps of a non-pointer!");
851 Result.clear();
852 {
853 // Check if there is cached Def with invariant.group.
854 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
855 if (NonLocalDefIt != NonLocalDefsCache.end()) {
856 Result.push_back(NonLocalDefIt->second);
857 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
858 .erase(QueryInst);
859 NonLocalDefsCache.erase(NonLocalDefIt);
860 return;
861 }
862 }
863 // This routine does not expect to deal with volatile instructions.
864 // Doing so would require piping through the QueryInst all the way through.
865 // TODO: volatiles can't be elided, but they can be reordered with other
866 // non-volatile accesses.
867
868 // We currently give up on any instruction which is ordered, but we do handle
869 // atomic instructions which are unordered.
870 // TODO: Handle ordered instructions
871 auto isOrdered = [](Instruction *Inst) {
872 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873 return !LI->isUnordered();
874 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875 return !SI->isUnordered();
876 }
877 return false;
878 };
879 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
880 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
881 const_cast<Value *>(Loc.Ptr)));
882 return;
883 }
884 const DataLayout &DL = FromBB->getDataLayout();
885 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
886
887 // This is the set of blocks we've inspected, and the pointer we consider in
888 // each block. Because of critical edges, we currently bail out if querying
889 // a block with multiple different pointers. This can happen during PHI
890 // translation.
892 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
893 Result, Visited, true))
894 return;
895 Result.clear();
896 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
897 const_cast<Value *>(Loc.Ptr)));
898}
899
900/// Compute the memdep value for BB with Pointer/PointeeSize using either
901/// cached information in Cache or by doing a lookup (which may use dirty cache
902/// info if available).
903///
904/// If we do a lookup, add the result to the cache.
905MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
906 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
907 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
908 BatchAAResults &BatchAA) {
909
910 bool isInvariantLoad = false;
911
912 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
913 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
914
915 // Do a binary search to see if we already have an entry for this block in
916 // the cache set. If so, find it.
917 NonLocalDepInfo::iterator Entry = std::upper_bound(
918 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
919 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
920 --Entry;
921
922 NonLocalDepEntry *ExistingResult = nullptr;
923 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
924 ExistingResult = &*Entry;
925
926 // Use cached result for invariant load only if there is no dependency for non
927 // invariant load. In this case invariant load can not have any dependency as
928 // well.
929 if (ExistingResult && isInvariantLoad &&
930 !ExistingResult->getResult().isNonFuncLocal())
931 ExistingResult = nullptr;
932
933 // If we have a cached entry, and it is non-dirty, use it as the value for
934 // this dependency.
935 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
936 ++NumCacheNonLocalPtr;
937 return ExistingResult->getResult();
938 }
939
940 // Otherwise, we have to scan for the value. If we have a dirty cache
941 // entry, start scanning from its position, otherwise we scan from the end
942 // of the block.
943 BasicBlock::iterator ScanPos = BB->end();
944 if (ExistingResult && ExistingResult->getResult().getInst()) {
945 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
946 "Instruction invalidated?");
947 ++NumCacheDirtyNonLocalPtr;
948 ScanPos = ExistingResult->getResult().getInst()->getIterator();
949
950 // Eliminating the dirty entry from 'Cache', so update the reverse info.
951 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
952 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
953 } else {
954 ++NumUncacheNonLocalPtr;
955 }
956
957 // Scan the block for the dependency.
958 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
959 QueryInst, nullptr, BatchAA);
960
961 // Don't cache results for invariant load.
962 if (isInvariantLoad)
963 return Dep;
964
965 // If we had a dirty entry for the block, update it. Otherwise, just add
966 // a new entry.
967 if (ExistingResult)
968 ExistingResult->setResult(Dep);
969 else
970 Cache->push_back(NonLocalDepEntry(BB, Dep));
971
972 // If the block has a dependency (i.e. it isn't completely transparent to
973 // the value), remember the reverse association because we just added it
974 // to Cache!
975 if (!Dep.isLocal())
976 return Dep;
977
978 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
979 // update MemDep when we remove instructions.
980 Instruction *Inst = Dep.getInst();
981 assert(Inst && "Didn't depend on anything?");
982 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
983 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
984 return Dep;
985}
986
987/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
988/// array that are already properly ordered.
989///
990/// This is optimized for the case when only a few entries are added.
991static void
993 unsigned NumSortedEntries) {
994 switch (Cache.size() - NumSortedEntries) {
995 case 0:
996 // done, no new entries.
997 break;
998 case 2: {
999 // Two new entries, insert the last one into place.
1000 NonLocalDepEntry Val = Cache.back();
1001 Cache.pop_back();
1002 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1003 std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1004 Cache.insert(Entry, Val);
1005 [[fallthrough]];
1006 }
1007 case 1:
1008 // One new entry, Just insert the new value at the appropriate position.
1009 if (Cache.size() != 1) {
1010 NonLocalDepEntry Val = Cache.back();
1011 Cache.pop_back();
1012 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1013 llvm::upper_bound(Cache, Val);
1014 Cache.insert(Entry, Val);
1015 }
1016 break;
1017 default:
1018 // Added many values, do a full scale sort.
1019 llvm::sort(Cache);
1020 break;
1021 }
1022}
1023
1024/// Perform a dependency query based on pointer/pointeesize starting at the end
1025/// of StartBB.
1026///
1027/// Add any clobber/def results to the results vector and keep track of which
1028/// blocks are visited in 'Visited'.
1029///
1030/// This has special behavior for the first block queries (when SkipFirstBlock
1031/// is true). In this special case, it ignores the contents of the specified
1032/// block and starts returning dependence info for its predecessors.
1033///
1034/// This function returns true on success, or false to indicate that it could
1035/// not compute dependence information for some reason. This should be treated
1036/// as a clobber dependence on the first instruction in the predecessor block.
1037bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1038 Instruction *QueryInst, const PHITransAddr &Pointer,
1039 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1041 SmallDenseMap<BasicBlock *, Value *, 16> &Visited, bool SkipFirstBlock,
1042 bool IsIncomplete) {
1043 // Look up the cached info for Pointer.
1044 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1045
1046 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1047 // CacheKey, this value will be inserted as the associated value. Otherwise,
1048 // it'll be ignored, and we'll have to check to see if the cached size and
1049 // aa tags are consistent with the current query.
1050 NonLocalPointerInfo InitialNLPI;
1051 InitialNLPI.Size = Loc.Size;
1052 InitialNLPI.AATags = Loc.AATags;
1053
1054 bool isInvariantLoad = false;
1055 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1056 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1057
1058 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1059 // already have one.
1060 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1061 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1062 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1063
1064 // If we already have a cache entry for this CacheKey, we may need to do some
1065 // work to reconcile the cache entry and the current query.
1066 // Invariant loads don't participate in caching. Thus no need to reconcile.
1067 if (!isInvariantLoad && !Pair.second) {
1068 if (CacheInfo->Size != Loc.Size) {
1069 // The query's Size is not equal to the cached one. Throw out the cached
1070 // data and proceed with the query with the new size.
1071 CacheInfo->Pair = BBSkipFirstBlockPair();
1072 CacheInfo->Size = Loc.Size;
1073 for (auto &Entry : CacheInfo->NonLocalDeps)
1074 if (Instruction *Inst = Entry.getResult().getInst())
1075 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1076 CacheInfo->NonLocalDeps.clear();
1077 // The cache is cleared (in the above line) so we will have lost
1078 // information about blocks we have already visited. We therefore must
1079 // assume that the cache information is incomplete.
1080 IsIncomplete = true;
1081 }
1082
1083 // If the query's AATags are inconsistent with the cached one,
1084 // conservatively throw out the cached data and restart the query with
1085 // no tag if needed.
1086 if (CacheInfo->AATags != Loc.AATags) {
1087 if (CacheInfo->AATags) {
1088 CacheInfo->Pair = BBSkipFirstBlockPair();
1089 CacheInfo->AATags = AAMDNodes();
1090 for (auto &Entry : CacheInfo->NonLocalDeps)
1091 if (Instruction *Inst = Entry.getResult().getInst())
1092 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1093 CacheInfo->NonLocalDeps.clear();
1094 // The cache is cleared (in the above line) so we will have lost
1095 // information about blocks we have already visited. We therefore must
1096 // assume that the cache information is incomplete.
1097 IsIncomplete = true;
1098 }
1099 if (Loc.AATags)
1100 return getNonLocalPointerDepFromBB(
1101 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1102 Visited, SkipFirstBlock, IsIncomplete);
1103 }
1104 }
1105
1106 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1107
1108 // If we have valid cached information for exactly the block we are
1109 // investigating, just return it with no recomputation.
1110 // Don't use cached information for invariant loads since it is valid for
1111 // non-invariant loads only.
1112 if (!IsIncomplete && !isInvariantLoad &&
1113 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1114 // We have a fully cached result for this query then we can just return the
1115 // cached results and populate the visited set. However, we have to verify
1116 // that we don't already have conflicting results for these blocks. Check
1117 // to ensure that if a block in the results set is in the visited set that
1118 // it was for the same pointer query.
1119 if (!Visited.empty()) {
1120 for (auto &Entry : *Cache) {
1122 Visited.find(Entry.getBB());
1123 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1124 continue;
1125
1126 // We have a pointer mismatch in a block. Just return false, saying
1127 // that something was clobbered in this result. We could also do a
1128 // non-fully cached query, but there is little point in doing this.
1129 return false;
1130 }
1131 }
1132
1133 Value *Addr = Pointer.getAddr();
1134 for (auto &Entry : *Cache) {
1135 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1136 if (Entry.getResult().isNonLocal()) {
1137 continue;
1138 }
1139
1140 if (DT.isReachableFromEntry(Entry.getBB())) {
1141 Result.push_back(
1142 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1143 }
1144 }
1145 ++NumCacheCompleteNonLocalPtr;
1146 return true;
1147 }
1148
1149 // Otherwise, either this is a new block, a block with an invalid cache
1150 // pointer or one that we're about to invalidate by putting more info into
1151 // it than its valid cache info. If empty and not explicitly indicated as
1152 // incomplete, the result will be valid cache info, otherwise it isn't.
1153 //
1154 // Invariant loads don't affect cache in any way thus no need to update
1155 // CacheInfo as well.
1156 if (!isInvariantLoad) {
1157 if (!IsIncomplete && Cache->empty())
1158 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1159 else
1160 CacheInfo->Pair = BBSkipFirstBlockPair();
1161 }
1162
1164 Worklist.push_back(StartBB);
1165
1166 // PredList used inside loop.
1168
1169 // Keep track of the entries that we know are sorted. Previously cached
1170 // entries will all be sorted. The entries we add we only sort on demand (we
1171 // don't insert every element into its sorted position). We know that we
1172 // won't get any reuse from currently inserted values, because we don't
1173 // revisit blocks after we insert info for them.
1174 unsigned NumSortedEntries = Cache->size();
1175 unsigned WorklistEntries = BlockNumberLimit;
1176 bool GotWorklistLimit = false;
1177 LLVM_DEBUG(AssertSorted(*Cache));
1178
1179 BatchAAResults BatchAA(AA, &EEA);
1180 while (!Worklist.empty()) {
1181 BasicBlock *BB = Worklist.pop_back_val();
1182
1183 // If we do process a large number of blocks it becomes very expensive and
1184 // likely it isn't worth worrying about
1185 if (Result.size() > NumResultsLimit) {
1186 // Sort it now (if needed) so that recursive invocations of
1187 // getNonLocalPointerDepFromBB and other routines that could reuse the
1188 // cache value will only see properly sorted cache arrays.
1189 if (Cache && NumSortedEntries != Cache->size()) {
1190 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1191 }
1192 // Since we bail out, the "Cache" set won't contain all of the
1193 // results for the query. This is ok (we can still use it to accelerate
1194 // specific block queries) but we can't do the fastpath "return all
1195 // results from the set". Clear out the indicator for this.
1196 CacheInfo->Pair = BBSkipFirstBlockPair();
1197 return false;
1198 }
1199
1200 // Skip the first block if we have it.
1201 if (!SkipFirstBlock) {
1202 // Analyze the dependency of *Pointer in FromBB. See if we already have
1203 // been here.
1204 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1205
1206 // Get the dependency info for Pointer in BB. If we have cached
1207 // information, we will use it, otherwise we compute it.
1208 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1209 MemDepResult Dep = getNonLocalInfoForBlock(
1210 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1211
1212 // If we got a Def or Clobber, add this to the list of results.
1213 if (!Dep.isNonLocal()) {
1214 if (DT.isReachableFromEntry(BB)) {
1215 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1216 continue;
1217 }
1218 }
1219 }
1220
1221 // If 'Pointer' is an instruction defined in this block, then we need to do
1222 // phi translation to change it into a value live in the predecessor block.
1223 // If not, we just add the predecessors to the worklist and scan them with
1224 // the same Pointer.
1225 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1226 SkipFirstBlock = false;
1228 for (BasicBlock *Pred : PredCache.get(BB)) {
1229 // Verify that we haven't looked at this block yet.
1230 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1231 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1232 if (InsertRes.second) {
1233 // First time we've looked at *PI.
1234 NewBlocks.push_back(Pred);
1235 continue;
1236 }
1237
1238 // If we have seen this block before, but it was with a different
1239 // pointer then we have a phi translation failure and we have to treat
1240 // this as a clobber.
1241 if (InsertRes.first->second != Pointer.getAddr()) {
1242 // Make sure to clean up the Visited map before continuing on to
1243 // PredTranslationFailure.
1244 for (auto *NewBlock : NewBlocks)
1245 Visited.erase(NewBlock);
1246 goto PredTranslationFailure;
1247 }
1248 }
1249 if (NewBlocks.size() > WorklistEntries) {
1250 // Make sure to clean up the Visited map before continuing on to
1251 // PredTranslationFailure.
1252 for (auto *NewBlock : NewBlocks)
1253 Visited.erase(NewBlock);
1254 GotWorklistLimit = true;
1255 goto PredTranslationFailure;
1256 }
1257 WorklistEntries -= NewBlocks.size();
1258 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1259 continue;
1260 }
1261
1262 // We do need to do phi translation, if we know ahead of time we can't phi
1263 // translate this value, don't even try.
1264 if (!Pointer.isPotentiallyPHITranslatable())
1265 goto PredTranslationFailure;
1266
1267 // We may have added values to the cache list before this PHI translation.
1268 // If so, we haven't done anything to ensure that the cache remains sorted.
1269 // Sort it now (if needed) so that recursive invocations of
1270 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1271 // value will only see properly sorted cache arrays.
1272 if (Cache && NumSortedEntries != Cache->size()) {
1273 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1274 NumSortedEntries = Cache->size();
1275 }
1276 Cache = nullptr;
1277
1278 PredList.clear();
1279 for (BasicBlock *Pred : PredCache.get(BB)) {
1280 PredList.push_back(std::make_pair(Pred, Pointer));
1281
1282 // Get the PHI translated pointer in this predecessor. This can fail if
1283 // not translatable, in which case the getAddr() returns null.
1284 PHITransAddr &PredPointer = PredList.back().second;
1285 Value *PredPtrVal =
1286 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1287
1288 // Check to see if we have already visited this pred block with another
1289 // pointer. If so, we can't do this lookup. This failure can occur
1290 // with PHI translation when a critical edge exists and the PHI node in
1291 // the successor translates to a pointer value different than the
1292 // pointer the block was first analyzed with.
1293 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1294 Visited.insert(std::make_pair(Pred, PredPtrVal));
1295
1296 if (!InsertRes.second) {
1297 // We found the pred; take it off the list of preds to visit.
1298 PredList.pop_back();
1299
1300 // If the predecessor was visited with PredPtr, then we already did
1301 // the analysis and can ignore it.
1302 if (InsertRes.first->second == PredPtrVal)
1303 continue;
1304
1305 // Otherwise, the block was previously analyzed with a different
1306 // pointer. We can't represent the result of this case, so we just
1307 // treat this as a phi translation failure.
1308
1309 // Make sure to clean up the Visited map before continuing on to
1310 // PredTranslationFailure.
1311 for (const auto &Pred : PredList)
1312 Visited.erase(Pred.first);
1313
1314 goto PredTranslationFailure;
1315 }
1316 }
1317
1318 // Actually process results here; this need to be a separate loop to avoid
1319 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1320 // any results for. (getNonLocalPointerDepFromBB will modify our
1321 // datastructures in ways the code after the PredTranslationFailure label
1322 // doesn't expect.)
1323 for (auto &I : PredList) {
1324 BasicBlock *Pred = I.first;
1325 PHITransAddr &PredPointer = I.second;
1326 Value *PredPtrVal = PredPointer.getAddr();
1327
1328 bool CanTranslate = true;
1329 // If PHI translation was unable to find an available pointer in this
1330 // predecessor, then we have to assume that the pointer is clobbered in
1331 // that predecessor. We can still do PRE of the load, which would insert
1332 // a computation of the pointer in this predecessor.
1333 if (!PredPtrVal)
1334 CanTranslate = false;
1335
1336 // FIXME: it is entirely possible that PHI translating will end up with
1337 // the same value. Consider PHI translating something like:
1338 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1339 // to recurse here, pedantically speaking.
1340
1341 // If getNonLocalPointerDepFromBB fails here, that means the cached
1342 // result conflicted with the Visited list; we have to conservatively
1343 // assume it is unknown, but this also does not block PRE of the load.
1344 if (!CanTranslate ||
1345 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1346 Loc.getWithNewPtr(PredPtrVal), isLoad,
1347 Pred, Result, Visited)) {
1348 // Add the entry to the Result list.
1350 Result.push_back(Entry);
1351
1352 // Since we had a phi translation failure, the cache for CacheKey won't
1353 // include all of the entries that we need to immediately satisfy future
1354 // queries. Mark this in NonLocalPointerDeps by setting the
1355 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1356 // cached value to do more work but not miss the phi trans failure.
1357 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1358 NLPI.Pair = BBSkipFirstBlockPair();
1359 continue;
1360 }
1361 }
1362
1363 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1364 CacheInfo = &NonLocalPointerDeps[CacheKey];
1365 Cache = &CacheInfo->NonLocalDeps;
1366 NumSortedEntries = Cache->size();
1367
1368 // Since we did phi translation, the "Cache" set won't contain all of the
1369 // results for the query. This is ok (we can still use it to accelerate
1370 // specific block queries) but we can't do the fastpath "return all
1371 // results from the set" Clear out the indicator for this.
1372 CacheInfo->Pair = BBSkipFirstBlockPair();
1373 SkipFirstBlock = false;
1374 continue;
1375
1376 PredTranslationFailure:
1377 // The following code is "failure"; we can't produce a sane translation
1378 // for the given block. It assumes that we haven't modified any of
1379 // our datastructures while processing the current block.
1380
1381 if (!Cache) {
1382 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1383 CacheInfo = &NonLocalPointerDeps[CacheKey];
1384 Cache = &CacheInfo->NonLocalDeps;
1385 NumSortedEntries = Cache->size();
1386 }
1387
1388 // Since we failed phi translation, the "Cache" set won't contain all of the
1389 // results for the query. This is ok (we can still use it to accelerate
1390 // specific block queries) but we can't do the fastpath "return all
1391 // results from the set". Clear out the indicator for this.
1392 CacheInfo->Pair = BBSkipFirstBlockPair();
1393
1394 // If *nothing* works, mark the pointer as unknown.
1395 //
1396 // If this is the magic first block, return this as a clobber of the whole
1397 // incoming value. Since we can't phi translate to one of the predecessors,
1398 // we have to bail out.
1399 if (SkipFirstBlock)
1400 return false;
1401
1402 // Results of invariant loads are not cached thus no need to update cached
1403 // information.
1404 if (!isInvariantLoad) {
1405 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1406 if (I.getBB() != BB)
1407 continue;
1408
1409 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1410 !DT.isReachableFromEntry(BB)) &&
1411 "Should only be here with transparent block");
1412
1413 I.setResult(MemDepResult::getUnknown());
1414
1415
1416 break;
1417 }
1418 }
1419 (void)GotWorklistLimit;
1420 // Go ahead and report unknown dependence.
1421 Result.push_back(
1423 }
1424
1425 // Okay, we're done now. If we added new values to the cache, re-sort it.
1426 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1427 LLVM_DEBUG(AssertSorted(*Cache));
1428 return true;
1429}
1430
1431/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1432void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1433 ValueIsLoadPair P) {
1434
1435 // Most of the time this cache is empty.
1436 if (!NonLocalDefsCache.empty()) {
1437 auto it = NonLocalDefsCache.find(P.getPointer());
1438 if (it != NonLocalDefsCache.end()) {
1439 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1440 it->second.getResult().getInst(), P.getPointer());
1441 NonLocalDefsCache.erase(it);
1442 }
1443
1444 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1445 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1446 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1447 for (const auto *entry : toRemoveIt->second)
1448 NonLocalDefsCache.erase(entry);
1449 ReverseNonLocalDefsCache.erase(toRemoveIt);
1450 }
1451 }
1452 }
1453
1454 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1455 if (It == NonLocalPointerDeps.end())
1456 return;
1457
1458 // Remove all of the entries in the BB->val map. This involves removing
1459 // instructions from the reverse map.
1460 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1461
1462 for (const NonLocalDepEntry &DE : PInfo) {
1463 Instruction *Target = DE.getResult().getInst();
1464 if (!Target)
1465 continue; // Ignore non-local dep results.
1466 assert(Target->getParent() == DE.getBB());
1467
1468 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1469 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1470 }
1471
1472 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1473 NonLocalPointerDeps.erase(It);
1474}
1475
1477 // If Ptr isn't really a pointer, just ignore it.
1478 if (!Ptr->getType()->isPointerTy())
1479 return;
1480 // Flush store info for the pointer.
1481 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1482 // Flush load info for the pointer.
1483 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1484}
1485
1487 PredCache.clear();
1488}
1489
1491 EEA.removeInstruction(RemInst);
1492
1493 // Walk through the Non-local dependencies, removing this one as the value
1494 // for any cached queries.
1495 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1496 if (NLDI != NonLocalDepsMap.end()) {
1497 NonLocalDepInfo &BlockMap = NLDI->second.first;
1498 for (auto &Entry : BlockMap)
1499 if (Instruction *Inst = Entry.getResult().getInst())
1500 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1501 NonLocalDepsMap.erase(NLDI);
1502 }
1503
1504 // If we have a cached local dependence query for this instruction, remove it.
1505 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1506 if (LocalDepEntry != LocalDeps.end()) {
1507 // Remove us from DepInst's reverse set now that the local dep info is gone.
1508 if (Instruction *Inst = LocalDepEntry->second.getInst())
1509 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1510
1511 // Remove this local dependency info.
1512 LocalDeps.erase(LocalDepEntry);
1513 }
1514
1515 // If we have any cached dependencies on this instruction, remove
1516 // them.
1517
1518 // If the instruction is a pointer, remove it from both the load info and the
1519 // store info.
1520 if (RemInst->getType()->isPointerTy()) {
1521 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1522 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1523 } else {
1524 // Otherwise, if the instructions is in the map directly, it must be a load.
1525 // Remove it.
1526 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1527 if (toRemoveIt != NonLocalDefsCache.end()) {
1528 assert(isa<LoadInst>(RemInst) &&
1529 "only load instructions should be added directly");
1530 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1531 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1532 NonLocalDefsCache.erase(toRemoveIt);
1533 }
1534 }
1535
1536 // Loop over all of the things that depend on the instruction we're removing.
1538
1539 // If we find RemInst as a clobber or Def in any of the maps for other values,
1540 // we need to replace its entry with a dirty version of the instruction after
1541 // it. If RemInst is a terminator, we use a null dirty value.
1542 //
1543 // Using a dirty version of the instruction after RemInst saves having to scan
1544 // the entire block to get to this point.
1545 MemDepResult NewDirtyVal;
1546 if (!RemInst->isTerminator())
1547 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1548
1549 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1550 if (ReverseDepIt != ReverseLocalDeps.end()) {
1551 // RemInst can't be the terminator if it has local stuff depending on it.
1552 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1553 "Nothing can locally depend on a terminator");
1554
1555 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1556 assert(InstDependingOnRemInst != RemInst &&
1557 "Already removed our local dep info");
1558
1559 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1560
1561 // Make sure to remember that new things depend on NewDepInst.
1562 assert(NewDirtyVal.getInst() &&
1563 "There is no way something else can have "
1564 "a local dep on this if it is a terminator!");
1565 ReverseDepsToAdd.push_back(
1566 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1567 }
1568
1569 ReverseLocalDeps.erase(ReverseDepIt);
1570
1571 // Add new reverse deps after scanning the set, to avoid invalidating the
1572 // 'ReverseDeps' reference.
1573 while (!ReverseDepsToAdd.empty()) {
1574 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1575 ReverseDepsToAdd.back().second);
1576 ReverseDepsToAdd.pop_back();
1577 }
1578 }
1579
1580 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1581 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1582 for (Instruction *I : ReverseDepIt->second) {
1583 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1584
1585 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1586 // The information is now dirty!
1587 INLD.second = true;
1588
1589 for (auto &Entry : INLD.first) {
1590 if (Entry.getResult().getInst() != RemInst)
1591 continue;
1592
1593 // Convert to a dirty entry for the subsequent instruction.
1594 Entry.setResult(NewDirtyVal);
1595
1596 if (Instruction *NextI = NewDirtyVal.getInst())
1597 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1598 }
1599 }
1600
1601 ReverseNonLocalDeps.erase(ReverseDepIt);
1602
1603 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1604 while (!ReverseDepsToAdd.empty()) {
1605 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1606 ReverseDepsToAdd.back().second);
1607 ReverseDepsToAdd.pop_back();
1608 }
1609 }
1610
1611 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1612 // value in the NonLocalPointerDeps info.
1613 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1614 ReverseNonLocalPtrDeps.find(RemInst);
1615 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1617 ReversePtrDepsToAdd;
1618
1619 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1620 assert(P.getPointer() != RemInst &&
1621 "Already removed NonLocalPointerDeps info for RemInst");
1622
1623 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1624
1625 // The cache is not valid for any specific block anymore.
1626 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1627
1628 // Update any entries for RemInst to use the instruction after it.
1629 for (auto &Entry : NLPDI) {
1630 if (Entry.getResult().getInst() != RemInst)
1631 continue;
1632
1633 // Convert to a dirty entry for the subsequent instruction.
1634 Entry.setResult(NewDirtyVal);
1635
1636 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1637 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1638 }
1639
1640 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1641 // subsequent value may invalidate the sortedness.
1642 llvm::sort(NLPDI);
1643 }
1644
1645 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1646
1647 while (!ReversePtrDepsToAdd.empty()) {
1648 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1649 ReversePtrDepsToAdd.back().second);
1650 ReversePtrDepsToAdd.pop_back();
1651 }
1652 }
1653
1654 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1655 LLVM_DEBUG(verifyRemoved(RemInst));
1656}
1657
1658/// Verify that the specified instruction does not occur in our internal data
1659/// structures.
1660///
1661/// This function verifies by asserting in debug builds.
1662void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1663#ifndef NDEBUG
1664 for (const auto &DepKV : LocalDeps) {
1665 assert(DepKV.first != D && "Inst occurs in data structures");
1666 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1667 }
1668
1669 for (const auto &DepKV : NonLocalPointerDeps) {
1670 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1671 for (const auto &Entry : DepKV.second.NonLocalDeps)
1672 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1673 }
1674
1675 for (const auto &DepKV : NonLocalDepsMap) {
1676 assert(DepKV.first != D && "Inst occurs in data structures");
1677 const PerInstNLInfo &INLD = DepKV.second;
1678 for (const auto &Entry : INLD.first)
1679 assert(Entry.getResult().getInst() != D &&
1680 "Inst occurs in data structures");
1681 }
1682
1683 for (const auto &DepKV : ReverseLocalDeps) {
1684 assert(DepKV.first != D && "Inst occurs in data structures");
1685 for (Instruction *Inst : DepKV.second)
1686 assert(Inst != D && "Inst occurs in data structures");
1687 }
1688
1689 for (const auto &DepKV : ReverseNonLocalDeps) {
1690 assert(DepKV.first != D && "Inst occurs in data structures");
1691 for (Instruction *Inst : DepKV.second)
1692 assert(Inst != D && "Inst occurs in data structures");
1693 }
1694
1695 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1696 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1697
1698 for (ValueIsLoadPair P : DepKV.second)
1699 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1700 "Inst occurs in ReverseNonLocalPtrDeps map");
1701 }
1702#endif
1703}
1704
1705AnalysisKey MemoryDependenceAnalysis::Key;
1706
1708 : DefaultBlockScanLimit(BlockScanLimit) {}
1709
1712 auto &AA = AM.getResult<AAManager>(F);
1713 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1714 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1715 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1716 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1717}
1718
1720
1722 "Memory Dependence Analysis", false, true)
1729
1732}
1733
1735
1737 MemDep.reset();
1738}
1739
1741 AU.setPreservesAll();
1746}
1747
1750 // Check whether our analysis is preserved.
1751 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1752 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1753 // If not, give up now.
1754 return true;
1755
1756 // Check whether the analyses we depend on became invalid for any reason.
1757 if (Inv.invalidate<AAManager>(F, PA) ||
1758 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1760 return true;
1761
1762 // Otherwise this analysis result remains valid.
1763 return false;
1764}
1765
1767 return DefaultBlockScanLimit;
1768}
1769
1771 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1772 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1773 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1774 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1775 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1776 return false;
1777}
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1745
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
Definition: AliasAnalysis.h:77
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:95
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:310
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
void removeInstruction(Instruction *I)
An instruction for ordering other memory operations.
Definition: Instructions.h:424
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:368
bool isTerminator() const
Definition: Instruction.h:277
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
Definition: PHITransAddr.h:58
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class provides various memory handling functions that manipulate MemoryBlock instances.
Definition: Memory.h:53
@ Entry
Definition: COFF.h:844
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool isStrongerThanUnordered(AtomicOrdering AO)
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1991
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
@ Other
Any other memory.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isNoModRef(const ModRefInfo MRI)
Definition: ModRef.h:39
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28