58 #define DEBUG_TYPE "memdep"
60 STATISTIC(NumCacheNonLocal,
"Number of fully cached non-local responses");
61 STATISTIC(NumCacheDirtyNonLocal,
"Number of dirty cached non-local responses");
62 STATISTIC(NumUncacheNonLocal,
"Number of uncached non-local responses");
65 "Number of fully cached non-local ptr responses");
67 "Number of cached, but dirty, non-local ptr responses");
68 STATISTIC(NumUncacheNonLocalPtr,
"Number of uncached non-local ptr responses");
70 "Number of block queries that were completely cached");
76 cl::desc(
"The number of instructions to scan in a block in memory "
77 "dependency analysis (default = 100)"));
81 cl::desc(
"The number of blocks to scan during memory "
82 "dependency analysis (default = 1000)"));
90 template <
typename KeyTy>
95 ReverseMap.
find(Inst);
96 assert(InstIt != ReverseMap.
end() &&
"Reverse map out of sync?");
97 bool Found = InstIt->second.
erase(Val);
98 assert(Found &&
"Invalid reverse map!");
100 if (InstIt->second.
empty())
101 ReverseMap.erase(InstIt);
111 if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
112 if (LI->isUnordered()) {
124 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
125 if (
SI->isUnordered()) {
137 if (
const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
148 if (
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
149 switch (II->getIntrinsicID()) {
150 case Intrinsic::lifetime_start:
151 case Intrinsic::lifetime_end:
152 case Intrinsic::invariant_start:
157 case Intrinsic::invariant_end:
162 case Intrinsic::masked_load:
165 case Intrinsic::masked_store:
182 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
188 while (ScanIt !=
BB->begin()) {
191 if (isa<DbgInfoIntrinsic>(Inst))
210 if (
auto *CallB = dyn_cast<CallBase>(Inst)) {
215 if (isReadOnlyCall && !
isModSet(MR) &&
216 Call->isIdenticalToWhenDefined(CallB))
234 if (
BB != &
BB->getParent()->getEntryBlock())
244 if (QueryInst !=
nullptr) {
245 if (
auto *LI = dyn_cast<LoadInst>(QueryInst)) {
248 if (InvariantGroupDependency.
isDef())
249 return InvariantGroupDependency;
253 MemLoc,
isLoad, ScanIt,
BB, QueryInst, Limit, BatchAA);
254 if (SimpleDep.
isDef())
260 return InvariantGroupDependency;
263 "InvariantGroupDependency should be only unknown at this point");
279 if (!LI->
hasMetadata(LLVMContext::MD_invariant_group))
290 if (isa<GlobalValue>(LoadOperand))
295 LoadOperandsQueue.push_back(LoadOperand);
301 assert(
Other &&
"Must call it with not null instruction");
309 while (!LoadOperandsQueue.empty()) {
311 assert(Ptr && !isa<GlobalValue>(Ptr) &&
312 "Null or GlobalValue should not be inserted");
314 for (
const Use &Us : Ptr->
uses()) {
315 auto *U = dyn_cast<Instruction>(Us.getUser());
316 if (!U || U == LI || !DT.
dominates(U, LI))
321 if (isa<BitCastInst>(U)) {
322 LoadOperandsQueue.push_back(U);
330 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U))
331 if (
GEP->hasAllZeroIndices()) {
332 LoadOperandsQueue.push_back(U);
339 if ((isa<LoadInst>(U) ||
340 (isa<StoreInst>(U) &&
342 U->hasMetadata(LLVMContext::MD_invariant_group))
343 ClosestDependency = GetClosestDependency(ClosestDependency, U);
347 if (!ClosestDependency)
355 NonLocalDefsCache.try_emplace(
358 ReverseNonLocalDefsCache[ClosestDependency].
insert(LI);
366 bool isInvariantLoad =
false;
370 Limit = &DefaultLimit;
404 if (
isLoad && QueryInst) {
405 LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
406 if (LI && LI->
hasMetadata(LLVMContext::MD_invariant_load))
407 isInvariantLoad =
true;
416 if (
auto *LI = dyn_cast<LoadInst>(
I))
418 if (
auto *
SI = dyn_cast<StoreInst>(
I))
420 return I->mayReadOrWriteMemory();
424 while (ScanIt !=
BB->begin()) {
429 if (isa<DbgInfoIntrinsic>(II))
443 case Intrinsic::lifetime_start: {
453 case Intrinsic::masked_load:
454 case Intrinsic::masked_store: {
462 if (
ID == Intrinsic::masked_load)
474 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
478 if (LI->isVolatile()) {
516 ClobberOffsets[LI] = R.getOffset();
537 if (!
SI->isUnordered() &&
SI->isAtomic()) {
555 if (
SI->isVolatile())
590 if (AccessPtr == Inst || BatchAA.
isMustAlias(Inst, AccessPtr))
602 if (
FenceInst *FI = dyn_cast<FenceInst>(Inst))
631 if (
BB != &
BB->getParent()->getEntryBlock())
637 ClobberOffsets.
clear();
645 if (!LocalCache.isDirty())
672 if (
auto *II = dyn_cast<IntrinsicInst>(QueryInst))
673 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
677 QueryParent, QueryInst,
nullptr);
678 }
else if (
auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
679 bool isReadOnly =
AA.onlyReadsMemory(QueryCall);
680 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
689 ReverseLocalDeps[
I].
insert(QueryInst);
700 Count = Cache.size();
702 "Cache isn't sorted!");
709 "getNonLocalCallDependency should only be used on calls with "
711 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
719 if (!Cache.empty()) {
722 if (!CacheP.second) {
729 for (
auto &Entry : Cache)
730 if (Entry.getResult().isDirty())
731 DirtyBlocks.push_back(Entry.getBB());
736 ++NumCacheDirtyNonLocal;
741 ++NumUncacheNonLocal;
745 bool isReadonlyCall =
AA.onlyReadsMemory(QueryCall);
749 unsigned NumSortedEntries = Cache.
size();
753 while (!DirtyBlocks.empty()) {
757 if (!Visited.
insert(DirtyBB).second)
763 NonLocalDepInfo::iterator Entry =
766 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
770 if (Entry != Cache.begin() + NumSortedEntries &&
771 Entry->getBB() == DirtyBB) {
774 if (!Entry->getResult().isDirty())
778 ExistingResult = &*Entry;
784 if (ExistingResult) {
788 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
796 if (ScanPos != DirtyBB->
begin()) {
797 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
819 ReverseNonLocalDeps[Inst].
insert(QueryCall);
834 bool isLoad = isa<LoadInst>(QueryInst);
839 "Can't get pointer deps of a non-pointer!");
843 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
844 if (NonLocalDefIt != NonLocalDefsCache.end()) {
845 Result.push_back(NonLocalDefIt->second);
846 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
848 NonLocalDefsCache.erase(NonLocalDefIt);
861 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
862 return !LI->isUnordered();
863 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
864 return !
SI->isUnordered();
881 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc,
isLoad, FromBB,
882 Result, Visited,
true))
894 MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
896 BasicBlock *
BB, NonLocalDepInfo *Cache,
unsigned NumSortedEntries,
899 bool isInvariantLoad =
false;
901 if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
902 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
908 if (Entry != Cache->begin() && (Entry - 1)->getBB() ==
BB)
912 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() ==
BB)
913 ExistingResult = &*Entry;
918 if (ExistingResult && isInvariantLoad &&
920 ExistingResult =
nullptr;
924 if (ExistingResult && !ExistingResult->
getResult().isDirty()) {
925 ++NumCacheNonLocalPtr;
935 "Instruction invalidated?");
936 ++NumCacheDirtyNonLocalPtr;
940 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
943 ++NumUncacheNonLocalPtr;
948 QueryInst,
nullptr, BatchAA);
970 assert(Inst &&
"Didn't depend on anything?");
971 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
972 ReverseNonLocalPtrDeps[Inst].
insert(CacheKey);
982 unsigned NumSortedEntries) {
983 switch (Cache.size() - NumSortedEntries) {
991 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
993 Cache.insert(Entry, Val);
998 if (Cache.size() != 1) {
1001 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1003 Cache.insert(Entry, Val);
1026 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1031 bool IsIncomplete) {
1039 NonLocalPointerInfo InitialNLPI;
1040 InitialNLPI.Size = Loc.
Size;
1041 InitialNLPI.AATags = Loc.
AATags;
1043 bool isInvariantLoad =
false;
1044 if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1045 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1049 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1050 NonLocalPointerDeps.
insert(std::make_pair(CacheKey, InitialNLPI));
1051 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1056 if (!isInvariantLoad && !Pair.second) {
1057 if (CacheInfo->Size != Loc.
Size) {
1058 bool ThrowOutEverything;
1059 if (CacheInfo->Size.hasValue() && Loc.
Size.
hasValue()) {
1063 ThrowOutEverything =
1071 if (ThrowOutEverything) {
1074 CacheInfo->Pair = BBSkipFirstBlockPair();
1075 CacheInfo->Size = Loc.
Size;
1076 for (
auto &Entry : CacheInfo->NonLocalDeps)
1077 if (
Instruction *Inst = Entry.getResult().getInst())
1079 CacheInfo->NonLocalDeps.clear();
1083 IsIncomplete =
true;
1087 return getNonLocalPointerDepFromBB(
1089 StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
1096 if (CacheInfo->AATags != Loc.
AATags) {
1097 if (CacheInfo->AATags) {
1098 CacheInfo->Pair = BBSkipFirstBlockPair();
1100 for (
auto &Entry : CacheInfo->NonLocalDeps)
1101 if (
Instruction *Inst = Entry.getResult().getInst())
1103 CacheInfo->NonLocalDeps.clear();
1107 IsIncomplete =
true;
1110 return getNonLocalPointerDepFromBB(
1112 Visited, SkipFirstBlock, IsIncomplete);
1122 if (!IsIncomplete && !isInvariantLoad &&
1123 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1129 if (!Visited.
empty()) {
1130 for (
auto &Entry : *Cache) {
1132 Visited.
find(Entry.getBB());
1144 for (
auto &Entry : *Cache) {
1145 Visited.
insert(std::make_pair(Entry.getBB(),
Addr));
1146 if (Entry.getResult().isNonLocal()) {
1155 ++NumCacheCompleteNonLocalPtr;
1166 if (!isInvariantLoad) {
1167 if (!IsIncomplete && Cache->empty())
1168 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1170 CacheInfo->Pair = BBSkipFirstBlockPair();
1174 Worklist.push_back(StartBB);
1184 unsigned NumSortedEntries = Cache->size();
1186 bool GotWorklistLimit =
false;
1190 while (!Worklist.empty()) {
1199 if (Cache && NumSortedEntries != Cache->size()) {
1206 CacheInfo->Pair = BBSkipFirstBlockPair();
1211 if (!SkipFirstBlock) {
1214 assert(Visited.
count(
BB) &&
"Should check 'visited' before adding to WL");
1220 QueryInst, Loc,
isLoad,
BB, Cache, NumSortedEntries, BatchAA);
1235 if (!
Pointer.NeedsPHITranslationFromBlock(
BB)) {
1236 SkipFirstBlock =
false;
1240 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1242 if (InsertRes.second) {
1244 NewBlocks.push_back(Pred);
1251 if (InsertRes.first->second !=
Pointer.getAddr()) {
1254 for (
unsigned i = 0;
i < NewBlocks.size();
i++)
1255 Visited.
erase(NewBlocks[
i]);
1256 goto PredTranslationFailure;
1259 if (NewBlocks.size() > WorklistEntries) {
1262 for (
unsigned i = 0;
i < NewBlocks.size();
i++)
1263 Visited.
erase(NewBlocks[
i]);
1264 GotWorklistLimit =
true;
1265 goto PredTranslationFailure;
1267 WorklistEntries -= NewBlocks.size();
1268 Worklist.
append(NewBlocks.begin(), NewBlocks.end());
1274 if (!
Pointer.IsPotentiallyPHITranslatable())
1275 goto PredTranslationFailure;
1282 if (Cache && NumSortedEntries != Cache->size()) {
1284 NumSortedEntries = Cache->size();
1290 PredList.push_back(std::make_pair(Pred, Pointer));
1303 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1304 Visited.
insert(std::make_pair(Pred, PredPtrVal));
1306 if (!InsertRes.second) {
1308 PredList.pop_back();
1312 if (InsertRes.first->second == PredPtrVal)
1321 for (
unsigned i = 0,
n = PredList.size();
i <
n; ++
i)
1322 Visited.
erase(PredList[
i].first);
1324 goto PredTranslationFailure;
1333 for (
unsigned i = 0,
n = PredList.size();
i <
n; ++
i) {
1338 bool CanTranslate =
true;
1344 CanTranslate =
false;
1354 if (!CanTranslate ||
1355 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1357 Pred, Result, Visited)) {
1367 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1368 NLPI.Pair = BBSkipFirstBlockPair();
1374 CacheInfo = &NonLocalPointerDeps[CacheKey];
1375 Cache = &CacheInfo->NonLocalDeps;
1376 NumSortedEntries = Cache->
size();
1382 CacheInfo->Pair = BBSkipFirstBlockPair();
1383 SkipFirstBlock =
false;
1386 PredTranslationFailure:
1393 CacheInfo = &NonLocalPointerDeps[CacheKey];
1394 Cache = &CacheInfo->NonLocalDeps;
1395 NumSortedEntries = Cache->
size();
1402 CacheInfo->Pair = BBSkipFirstBlockPair();
1414 if (!isInvariantLoad) {
1416 if (
I.getBB() !=
BB)
1419 assert((GotWorklistLimit ||
I.getResult().isNonLocal() ||
1421 "Should only be here with transparent block");
1429 (void)GotWorklistLimit;
1442 void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1443 ValueIsLoadPair
P) {
1446 if (!NonLocalDefsCache.empty()) {
1447 auto it = NonLocalDefsCache.find(
P.getPointer());
1448 if (
it != NonLocalDefsCache.end()) {
1450 it->second.getResult().getInst(),
P.getPointer());
1451 NonLocalDefsCache.erase(
it);
1454 if (
auto *
I = dyn_cast<Instruction>(
P.getPointer())) {
1455 auto toRemoveIt = ReverseNonLocalDefsCache.
find(
I);
1456 if (toRemoveIt != ReverseNonLocalDefsCache.
end()) {
1457 for (
const auto *
entry : toRemoveIt->second)
1458 NonLocalDefsCache.erase(
entry);
1459 ReverseNonLocalDefsCache.
erase(toRemoveIt);
1465 if (It == NonLocalPointerDeps.
end())
1483 NonLocalPointerDeps.
erase(It);
1506 if (NLDI != NonLocalDepsMap.
end()) {
1508 for (
auto &Entry : BlockMap)
1509 if (
Instruction *Inst = Entry.getResult().getInst())
1511 NonLocalDepsMap.
erase(NLDI);
1516 if (LocalDepEntry != LocalDeps.
end()) {
1518 if (
Instruction *Inst = LocalDepEntry->second.getInst())
1522 LocalDeps.
erase(LocalDepEntry);
1531 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
false));
1532 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
true));
1536 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1537 if (toRemoveIt != NonLocalDefsCache.end()) {
1538 assert(isa<LoadInst>(RemInst) &&
1539 "only load instructions should be added directly");
1540 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1541 ReverseNonLocalDefsCache.
find(DepV)->second.erase(RemInst);
1542 NonLocalDefsCache.erase(toRemoveIt);
1557 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->
getIterator());
1560 if (ReverseDepIt != ReverseLocalDeps.
end()) {
1563 "Nothing can locally depend on a terminator");
1565 for (
Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1566 assert(InstDependingOnRemInst != RemInst &&
1567 "Already removed our local dep info");
1569 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1573 "There is no way something else can have "
1574 "a local dep on this if it is a terminator!");
1575 ReverseDepsToAdd.push_back(
1576 std::make_pair(NewDirtyVal.
getInst(), InstDependingOnRemInst));
1579 ReverseLocalDeps.
erase(ReverseDepIt);
1583 while (!ReverseDepsToAdd.empty()) {
1584 ReverseLocalDeps[ReverseDepsToAdd.back().first].
insert(
1585 ReverseDepsToAdd.back().second);
1586 ReverseDepsToAdd.pop_back();
1590 ReverseDepIt = ReverseNonLocalDeps.
find(RemInst);
1591 if (ReverseDepIt != ReverseNonLocalDeps.
end()) {
1593 assert(
I != RemInst &&
"Already removed NonLocalDep info for RemInst");
1595 PerInstNLInfo &INLD = NonLocalDepsMap[
I];
1599 for (
auto &Entry : INLD.first) {
1600 if (Entry.getResult().getInst() != RemInst)
1604 Entry.setResult(NewDirtyVal);
1607 ReverseDepsToAdd.push_back(std::make_pair(NextI,
I));
1611 ReverseNonLocalDeps.
erase(ReverseDepIt);
1614 while (!ReverseDepsToAdd.empty()) {
1615 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].
insert(
1616 ReverseDepsToAdd.back().second);
1617 ReverseDepsToAdd.pop_back();
1624 ReverseNonLocalPtrDeps.
find(RemInst);
1625 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.
end()) {
1627 ReversePtrDepsToAdd;
1630 assert(
P.getPointer() != RemInst &&
1631 "Already removed NonLocalPointerDeps info for RemInst");
1639 for (
auto &Entry : NLPDI) {
1640 if (Entry.getResult().getInst() != RemInst)
1644 Entry.setResult(NewDirtyVal);
1647 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst,
P));
1655 ReverseNonLocalPtrDeps.
erase(ReversePtrDepIt);
1657 while (!ReversePtrDepsToAdd.empty()) {
1658 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].
insert(
1659 ReversePtrDepsToAdd.back().second);
1660 ReversePtrDepsToAdd.pop_back();
1667 assert(!NonLocalDepsMap.
count(RemInst) &&
"RemInst got reinserted?");
1675 void MemoryDependenceResults::verifyRemoved(
Instruction *
D)
const {
1677 for (
const auto &DepKV : LocalDeps) {
1678 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1679 assert(DepKV.second.getInst() !=
D &&
"Inst occurs in data structures");
1682 for (
const auto &DepKV : NonLocalPointerDeps) {
1683 assert(DepKV.first.getPointer() !=
D &&
"Inst occurs in NLPD map key");
1684 for (
const auto &Entry : DepKV.second.NonLocalDeps)
1685 assert(Entry.getResult().getInst() !=
D &&
"Inst occurs as NLPD value");
1688 for (
const auto &DepKV : NonLocalDepsMap) {
1689 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1690 const PerInstNLInfo &INLD = DepKV.second;
1691 for (
const auto &Entry : INLD.first)
1692 assert(Entry.getResult().getInst() !=
D &&
1693 "Inst occurs in data structures");
1696 for (
const auto &DepKV : ReverseLocalDeps) {
1697 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1699 assert(Inst !=
D &&
"Inst occurs in data structures");
1702 for (
const auto &DepKV : ReverseNonLocalDeps) {
1703 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1705 assert(Inst !=
D &&
"Inst occurs in data structures");
1708 for (
const auto &DepKV : ReverseNonLocalPtrDeps) {
1709 assert(DepKV.first !=
D &&
"Inst occurs in rev NLPD map");
1711 for (ValueIsLoadPair
P : DepKV.second)
1712 assert(
P != ValueIsLoadPair(
D,
false) &&
P != ValueIsLoadPair(
D,
true) &&
1713 "Inst occurs in ReverseNonLocalPtrDeps map");
1736 "Memory Dependence Analysis",
false,
true)
1784 return DefaultBlockScanLimit;
1788 auto &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1789 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1790 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1791 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1792 auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();