28 #define DEBUG_TYPE "loop-accesses"
32 cl::desc(
"Sets the SIMD width. Zero is autoselect."),
38 cl::desc(
"Sets the vectorization interleave count. "
39 "Zero is autoselect."),
46 cl::desc(
"When performing memory disambiguation checks at runtime do not "
47 "generate more than this number of comparisons (default = 8)."),
54 cl::desc(
"Maximum number of comparisons done when trying to merge "
55 "runtime memory checks. (default = 100)"),
64 cl::desc(
"Maximum number of interesting dependences collected by "
65 "loop-access analysis (default = 100)"),
75 const char *PassName) {
78 DL =
I->getDebugLoc();
80 *TheFunction,
DL, Message.
str());
84 if (
CastInst *CI = dyn_cast<CastInst>(V))
85 if (CI->getOperand(0)->getType()->isIntegerTy())
86 return CI->getOperand(0);
99 PtrToStride.
find(OrigPtr ? OrigPtr : Ptr);
100 if (SI != PtrToStride.
end()) {
101 Value *StrideVal = SI->second;
109 RewriteMap[StrideVal] =
One;
113 DEBUG(
dbgs() <<
"LAA: Replacing SCEV: " << *OrigSCEV <<
" by: " << *ByOne
123 unsigned DepSetId,
unsigned ASId,
128 assert(AR &&
"Invalid addrec expression");
139 for (
unsigned J = 0, EJ = N.
Members.
size(); EJ != J; ++J)
209 if (!UseDependencies) {
215 unsigned TotalComparisons = 0;
218 for (
unsigned Index = 0; Index <
Pointers.size(); ++Index)
219 PositionMap[
Pointers[Index].PointerValue] = Index;
247 unsigned Pointer = PositionMap[
MI->getPointer()];
264 if (Group.addPointer(Pointer)) {
285 const PointerInfo &PointerI =
Pointers[
I];
286 const PointerInfo &PointerJ =
Pointers[J];
289 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
293 if (PointerI.DependencySetId == PointerJ.DependencySetId)
297 if (PointerI.AliasSetId != PointerJ.AliasSetId)
303 if (PtrPartition && (*PtrPartition)[I] != -1 &&
304 (*PtrPartition)[I] == (*PtrPartition)[J])
314 OS.
indent(Depth) <<
"Run-time memory checks:\n";
320 OS.
indent(Depth) <<
"Check " << N++ <<
":\n";
321 OS.
indent(Depth + 2) <<
"Comparing group " << I <<
":\n";
327 OS <<
" (Partition: "
332 OS.
indent(Depth + 2) <<
"Against group " << J <<
":\n";
338 OS <<
" (Partition: "
344 OS.
indent(Depth) <<
"Grouped accesses:\n";
346 OS.
indent(Depth + 2) <<
"Group " << I <<
":\n";
350 OS.
indent(Depth + 6) <<
"Member: "
361 unsigned CheckCount = 0;
363 for (
unsigned I = 0; I < NumPartitions; ++
I)
364 for (
unsigned J = I + 1; J < NumPartitions; ++J)
372 unsigned NumPointers =
Pointers.size();
374 for (
unsigned I = 0; I < NumPointers; ++
I)
375 for (
unsigned J = I + 1; J < NumPointers; ++J)
386 class AccessAnalysis {
394 :
DL(Dl), AST(*AA), LI(LI), DepCands(DA),
395 IsRTCheckAnalysisNeeded(
false) {}
401 Accesses.insert(MemAccessInfo(Ptr,
false));
403 ReadOnlyPtr.insert(Ptr);
410 Accesses.insert(MemAccessInfo(Ptr,
true));
420 bool ShouldCheckStride =
false);
424 void buildDependenceSets() {
425 processMemAccesses();
433 bool isDependencyCheckNeeded() {
return !CheckDeps.empty(); }
441 MemAccessInfoSet &getDependenciesToCheck() {
return CheckDeps; }
448 void processMemAccesses();
451 PtrAccessSet Accesses;
456 MemAccessInfoSet CheckDeps;
479 bool IsRTCheckAnalysisNeeded;
498 bool ShouldCheckStride) {
503 bool NeedRTCheck =
false;
504 if (!IsRTCheckAnalysisNeeded)
return true;
506 bool IsDepCheckNeeded = isDependencyCheckNeeded();
511 for (
auto &AS : AST) {
512 int NumReadPtrChecks = 0;
513 int NumWritePtrChecks = 0;
517 unsigned RunningDepId = 1;
521 Value *Ptr =
A.getValue();
522 bool IsWrite = Accesses.count(MemAccessInfo(Ptr,
true));
523 MemAccessInfo Access(Ptr, IsWrite);
533 (!ShouldCheckStride ||
538 if (IsDepCheckNeeded) {
539 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
540 unsigned &LeaderId = DepSetId[Leader];
542 LeaderId = RunningDepId++;
546 DepId = RunningDepId++;
548 RtCheck.
insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
550 DEBUG(
dbgs() <<
"LAA: Found a runtime check ptr:" << *Ptr <<
'\n');
552 DEBUG(
dbgs() <<
"LAA: Can't find bounds for ptr:" << *Ptr <<
'\n');
565 if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
566 NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
567 NumWritePtrChecks >= 1));
577 unsigned NumPointers = RtCheck.
Pointers.size();
578 for (
unsigned i = 0; i < NumPointers; ++i) {
579 for (
unsigned j = i + 1; j < NumPointers; ++j) {
581 if (RtCheck.
Pointers[i].DependencySetId ==
582 RtCheck.
Pointers[j].DependencySetId)
594 DEBUG(
dbgs() <<
"LAA: Runtime check would require comparison between"
595 " different address spaces\n");
601 if (NeedRTCheck && CanDoRT)
605 <<
" pointer comparisons.\n");
607 RtCheck.
Need = NeedRTCheck;
609 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
610 if (!CanDoRTIfNeeded)
612 return CanDoRTIfNeeded;
615 void AccessAnalysis::processMemAccesses() {
620 DEBUG(
dbgs() <<
"LAA: Processing memory accesses...\n");
622 DEBUG(
dbgs() <<
"LAA: Accesses(" << Accesses.size() <<
"):\n");
624 for (
auto A : Accesses)
625 dbgs() <<
"\t" << *
A.getPointer() <<
" (" <<
626 (
A.getInt() ?
"write" : (ReadOnlyPtr.count(
A.getPointer()) ?
627 "read-only" :
"read")) <<
")\n";
634 for (
auto &AS : AST) {
639 bool SetHasWrite =
false;
643 UnderlyingObjToAccessMap ObjToLastAccess;
646 PtrAccessSet DeferredAccesses;
650 for (
int SetIteration = 0; SetIteration < 2; ++SetIteration) {
651 bool UseDeferred = SetIteration > 0;
652 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
655 Value *Ptr = AV.getValue();
660 if (AC.getPointer() != Ptr)
663 bool IsWrite = AC.getInt();
667 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
668 if (UseDeferred && !IsReadOnlyPtr)
672 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
673 S.count(MemAccessInfo(Ptr,
false))) &&
674 "Alias-set pointer not in the access set?");
676 MemAccessInfo Access(Ptr, IsWrite);
677 DepCands.insert(Access);
684 if (!UseDeferred && IsReadOnlyPtr) {
685 DeferredAccesses.insert(Access);
693 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
694 CheckDeps.insert(Access);
695 IsRTCheckAnalysisNeeded =
true;
704 ValueVector TempObjects;
707 DEBUG(
dbgs() <<
"Underlying objects for pointer " << *Ptr <<
"\n");
708 for (
Value *UnderlyingObj : TempObjects) {
709 UnderlyingObjToAccessMap::iterator Prev =
710 ObjToLastAccess.find(UnderlyingObj);
711 if (Prev != ObjToLastAccess.end())
712 DepCands.unionSets(Access, Prev->second);
714 ObjToLastAccess[UnderlyingObj] = Access;
715 DEBUG(
dbgs() <<
" " << *UnderlyingObj <<
"\n");
725 return GEP->isInBounds();
746 if (!
GEP || !
GEP->isInBounds())
750 Value *NonConstIndex =
nullptr;
751 for (
auto Index =
GEP->idx_begin(); Index !=
GEP->idx_end(); ++Index)
752 if (!isa<ConstantInt>(*Index)) {
755 NonConstIndex = *Index;
763 if (
auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
764 if (OBO->hasNoSignedWrap() &&
767 isa<ConstantInt>(OBO->getOperand(1))) {
768 auto *OpScev = SE->
getSCEV(OBO->getOperand(0));
770 if (
auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
771 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(
SCEV::FlagNSW);
785 if (PtrTy->getElementType()->isAggregateType()) {
786 DEBUG(
dbgs() <<
"LAA: Bad stride - Not a pointer to a scalar type"
795 DEBUG(
dbgs() <<
"LAA: Bad stride - Not an AddRecExpr pointer "
796 << *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
802 DEBUG(
dbgs() <<
"LAA: Bad stride - Not striding over innermost loop " <<
803 *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
815 bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
816 if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
817 DEBUG(
dbgs() <<
"LAA: Bad stride - Pointer may wrap in the address space "
818 << *Ptr <<
" SCEV: " << *PtrScev <<
"\n");
828 DEBUG(
dbgs() <<
"LAA: Bad stride - Not a constant strided " << *Ptr <<
829 " SCEV: " << *PtrScev <<
"\n");
833 auto &
DL = Lp->
getHeader()->getModule()->getDataLayout();
834 int64_t Size =
DL.getTypeAllocSize(PtrTy->getElementType());
844 int64_t Stride = StepVal / Size;
845 int64_t Rem = StepVal % Size;
852 if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
853 Stride != 1 && Stride != -1)
863 case BackwardVectorizable:
867 case ForwardButPreventsForwarding:
869 case BackwardVectorizableButPreventsForwarding:
881 case BackwardVectorizable:
883 case ForwardButPreventsForwarding:
885 case BackwardVectorizableButPreventsForwarding:
895 case ForwardButPreventsForwarding:
899 case BackwardVectorizable:
901 case BackwardVectorizableButPreventsForwarding:
907 bool MemoryDepChecker::couldPreventStoreLoadForward(
unsigned Distance,
908 unsigned TypeByteSize) {
918 const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
920 unsigned MaxVFWithoutSLForwardIssues =
922 if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
923 MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
925 for (
unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
927 if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
928 MaxVFWithoutSLForwardIssues = (vf >>=1);
933 if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
934 DEBUG(
dbgs() <<
"LAA: Distance " << Distance <<
935 " that could cause a store-load forwarding conflict\n");
939 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
940 MaxVFWithoutSLForwardIssues !=
942 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
952 unsigned TypeByteSize) {
953 assert(Stride > 1 &&
"The stride must be greater than 1");
954 assert(TypeByteSize > 0 &&
"The type size in byte must be non-zero");
955 assert(Distance > 0 &&
"The distance must be non-zero");
958 if (Distance % TypeByteSize)
961 unsigned ScaledDist = Distance / TypeByteSize;
979 return ScaledDist % Stride;
983 MemoryDepChecker::isDependent(
const MemAccessInfo &
A,
unsigned AIdx,
984 const MemAccessInfo &B,
unsigned BIdx,
986 assert (AIdx < BIdx &&
"Must pass arguments in program order");
988 Value *APtr = A.getPointer();
989 Value *BPtr = B.getPointer();
990 bool AIsWrite = A.getInt();
991 bool BIsWrite = B.getInt();
994 if (!AIsWrite && !BIsWrite)
995 return Dependence::NoDep;
1005 int StrideAPtr =
isStridedPtr(SE, APtr, InnermostLoop, Strides);
1006 int StrideBPtr =
isStridedPtr(SE, BPtr, InnermostLoop, Strides);
1008 const SCEV *Src = AScev;
1013 if (StrideAPtr < 0) {
1025 DEBUG(
dbgs() <<
"LAA: Src Scev: " << *Src <<
"Sink Scev: " << *Sink
1026 <<
"(Induction step: " << StrideAPtr <<
")\n");
1027 DEBUG(
dbgs() <<
"LAA: Distance for " << *InstMap[AIdx] <<
" to "
1028 << *InstMap[BIdx] <<
": " << *Dist <<
"\n");
1033 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1034 DEBUG(
dbgs() <<
"Pointer access with non-constant stride\n");
1040 DEBUG(
dbgs() <<
"LAA: Dependence because of non-constant distance\n");
1041 ShouldRetryWithRuntimeCheck =
true;
1047 auto &
DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1048 unsigned TypeByteSize =
DL.getTypeAllocSize(ATy);
1053 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1054 if (IsTrueDataDependence &&
1055 (couldPreventStoreLoadForward(Val.
abs().
getZExtValue(), TypeByteSize) ||
1057 return Dependence::ForwardButPreventsForwarding;
1059 DEBUG(
dbgs() <<
"LAA: Dependence is negative: NoDep\n");
1060 return Dependence::Forward;
1067 return Dependence::NoDep;
1068 DEBUG(
dbgs() <<
"LAA: Zero dependence difference but different types\n");
1076 "LAA: ReadWrite-Write positive dependency with different types\n");
1082 unsigned Stride =
std::abs(StrideAPtr);
1085 DEBUG(
dbgs() <<
"LAA: Strided accesses are independent\n");
1086 return Dependence::NoDep;
1095 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1123 unsigned MinDistanceNeeded =
1124 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1125 if (MinDistanceNeeded > Distance) {
1126 DEBUG(
dbgs() <<
"LAA: Failure because of positive distance " << Distance
1128 return Dependence::Backward;
1132 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1133 DEBUG(
dbgs() <<
"LAA: Failure because it needs at least "
1134 << MinDistanceNeeded <<
" size in bytes");
1135 return Dependence::Backward;
1154 MaxSafeDepDistBytes =
1155 Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
1157 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1158 if (IsTrueDataDependence &&
1159 couldPreventStoreLoadForward(Distance, TypeByteSize))
1160 return Dependence::BackwardVectorizableButPreventsForwarding;
1163 <<
" with max VF = "
1164 << MaxSafeDepDistBytes / (TypeByteSize * Stride) <<
'\n');
1166 return Dependence::BackwardVectorizable;
1173 MaxSafeDepDistBytes = -1U;
1174 while (!CheckDeps.
empty()) {
1187 CheckDeps.
erase(*AI);
1191 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].
begin(),
1192 I1E = Accesses[*AI].
end(); I1 != I1E; ++I1)
1193 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].
begin(),
1194 I2E = Accesses[*OI].
end(); I2 != I2E; ++I2) {
1195 auto A = std::make_pair(&*AI, *I1);
1196 auto B = std::make_pair(&*OI, *I2);
1203 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1204 SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1210 if (RecordInterestingDependences) {
1211 if (Dependence::isInterestingDependence(Type))
1212 InterestingDependences.push_back(
1216 RecordInterestingDependences =
false;
1217 InterestingDependences.clear();
1218 DEBUG(
dbgs() <<
"Too many dependences, stopped recording\n");
1221 if (!RecordInterestingDependences && !SafeForVectorization)
1230 DEBUG(
dbgs() <<
"Total Interesting Dependences: "
1231 << InterestingDependences.size() <<
"\n");
1232 return SafeForVectorization;
1238 auto &IndexVector = Accesses.find(Access)->second;
1241 std::transform(IndexVector.begin(), IndexVector.end(),
1242 std::back_inserter(Insts),
1243 [&](
unsigned Idx) {
return this->InstMap[Idx]; });
1248 "NoDep",
"Unknown",
"Forward",
"ForwardButPreventsForwarding",
"Backward",
1249 "BackwardVectorizable",
"BackwardVectorizableButPreventsForwarding"};
1256 OS.
indent(Depth + 2) << *Instrs[Destination] <<
"\n";
1259 bool LoopAccessInfo::canAnalyzeLoop() {
1261 DEBUG(
dbgs() <<
"LAA: Found a loop: " <<
1262 TheLoop->
getHeader()->getName() <<
'\n');
1265 if (!TheLoop->
empty()) {
1266 DEBUG(
dbgs() <<
"LAA: loop is not the innermost loop\n");
1273 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1276 "loop control flow is not understood by analyzer");
1282 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1285 "loop control flow is not understood by analyzer");
1293 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1296 "loop control flow is not understood by analyzer");
1304 "could not determine number of loop iterations");
1305 DEBUG(
dbgs() <<
"LAA: SCEV could not compute the loop exit count.\n");
1322 unsigned NumReads = 0;
1323 unsigned NumReadWrites = 0;
1325 PtrRtChecking.Pointers.clear();
1326 PtrRtChecking.Need =
false;
1332 be = TheLoop->
block_end(); bb != be; ++bb) {
1341 if (it->mayReadFromMemory()) {
1356 if (!Ld || (!Ld->
isSimple() && !IsAnnotatedParallel)) {
1358 <<
"read with atomic ordering or volatile read");
1359 DEBUG(
dbgs() <<
"LAA: Found a non-simple load.\n");
1364 Loads.push_back(Ld);
1365 DepChecker.addAccess(Ld);
1370 if (it->mayWriteToMemory()) {
1374 "instruction cannot be vectorized");
1378 if (!St->
isSimple() && !IsAnnotatedParallel) {
1380 <<
"write with atomic ordering or volatile write");
1381 DEBUG(
dbgs() <<
"LAA: Found a non-simple store.\n");
1386 Stores.push_back(St);
1387 DepChecker.addAccess(St);
1397 if (!Stores.size()) {
1398 DEBUG(
dbgs() <<
"LAA: Found a read-only loop!\n");
1404 AccessAnalysis Accesses(TheLoop->
getHeader()->getModule()->getDataLayout(),
1405 AA, LI, DependentAccesses);
1414 ValueVector::iterator
I,
IE;
1415 for (I = Stores.begin(), IE = Stores.end(); I !=
IE; ++
I) {
1419 StoreToLoopInvariantAddress |= isUniform(Ptr);
1422 if (Seen.insert(Ptr).second) {
1429 if (blockNeedsPredication(ST->
getParent(), TheLoop, DT))
1432 Accesses.addStore(Loc);
1436 if (IsAnnotatedParallel) {
1438 <<
"LAA: A loop annotated parallel, ignore memory dependency "
1444 for (I = Loads.begin(), IE = Loads.end(); I !=
IE; ++
I) {
1455 bool IsReadOnlyPtr =
false;
1456 if (Seen.insert(Ptr).second || !
isStridedPtr(SE, Ptr, TheLoop, Strides)) {
1458 IsReadOnlyPtr =
true;
1465 if (blockNeedsPredication(LD->
getParent(), TheLoop, DT))
1468 Accesses.addLoad(Loc, IsReadOnlyPtr);
1473 if (NumReadWrites == 1 && NumReads == 0) {
1474 DEBUG(
dbgs() <<
"LAA: Found a write-only loop!\n");
1481 Accesses.buildDependenceSets();
1485 bool CanDoRTIfNeeded =
1486 Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
1487 if (!CanDoRTIfNeeded) {
1489 DEBUG(
dbgs() <<
"LAA: We can't vectorize because we can't find "
1490 <<
"the array bounds.\n");
1495 DEBUG(
dbgs() <<
"LAA: We can perform a memory runtime check if needed.\n");
1498 if (Accesses.isDependencyCheckNeeded()) {
1499 DEBUG(
dbgs() <<
"LAA: Checking memory dependencies\n");
1500 CanVecMem = DepChecker.areDepsSafe(
1501 DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
1502 MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
1504 if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
1505 DEBUG(
dbgs() <<
"LAA: Retrying with memory checks\n");
1508 Accesses.resetDepChecks(DepChecker);
1510 PtrRtChecking.reset();
1511 PtrRtChecking.Need =
true;
1514 Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides,
true);
1517 if (!CanDoRTIfNeeded) {
1519 <<
"cannot check memory dependencies at runtime");
1520 DEBUG(
dbgs() <<
"LAA: Can't vectorize with memory checks\n");
1530 DEBUG(
dbgs() <<
"LAA: No unsafe dependent memory operations in loop. We"
1531 << (PtrRtChecking.Need ?
"" :
" don't")
1532 <<
" need runtime memory checks.\n");
1535 "unsafe dependent memory operations in loop");
1536 DEBUG(
dbgs() <<
"LAA: unsafe dependent memory operations in loop\n");
1542 assert(TheLoop->
contains(BB) &&
"Unknown block used");
1550 assert(!Report &&
"Multiple reports generated");
1571 if (!PtrRtChecking.Need)
1572 return std::make_pair(
nullptr,
nullptr);
1581 for (
unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
1583 PtrRtChecking.CheckingGroups[i];
1584 Value *Ptr = PtrRtChecking.Pointers[CG.
Members[0]].PointerValue;
1588 DEBUG(
dbgs() <<
"LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1597 Value *Start =
nullptr, *End =
nullptr;
1599 DEBUG(
dbgs() <<
"LAA: Adding RT check for range:\n");
1610 Value *MemoryRuntimeCheck =
nullptr;
1611 for (
unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
1612 for (
unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
1614 PtrRtChecking.CheckingGroups[i];
1616 PtrRtChecking.CheckingGroups[j];
1618 if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
1621 unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
1622 unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
1624 assert((AS0 == Ends[j]->
getType()->getPointerAddressSpace()) &&
1625 (AS1 == Ends[i]->
getType()->getPointerAddressSpace()) &&
1626 "Trying to bounds check pointers with different address spaces");
1640 Value *IsConflict = ChkBuilder.
CreateAnd(Cmp0, Cmp1,
"found.conflict");
1642 if (MemoryRuntimeCheck) {
1643 IsConflict = ChkBuilder.
CreateOr(MemoryRuntimeCheck, IsConflict,
1647 MemoryRuntimeCheck = IsConflict;
1651 if (!MemoryRuntimeCheck)
1652 return std::make_pair(
nullptr,
nullptr);
1659 ChkBuilder.
Insert(Check,
"memcheck.conflict");
1661 return std::make_pair(FirstInst, Check);
1669 : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
1670 TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
1671 MaxSafeDepDistBytes(-1U), CanVecMem(
false),
1672 StoreToLoopInvariantAddress(
false) {
1673 if (canAnalyzeLoop())
1674 analyzeLoop(Strides);
1679 if (PtrRtChecking.
Need)
1680 OS.
indent(Depth) <<
"Memory dependences are safe with run-time checks\n";
1682 OS.
indent(Depth) <<
"Memory dependences are safe\n";
1686 OS.
indent(Depth) <<
"Report: " << Report->str() <<
"\n";
1689 OS.
indent(Depth) <<
"Interesting Dependences:\n";
1690 for (
auto &Dep : *InterestingDependences) {
1695 OS.
indent(Depth) <<
"Too many interesting dependences, not recorded\n";
1698 PtrRtChecking.
print(OS, Depth);
1701 OS.
indent(Depth) <<
"Store to invariant address was "
1702 << (StoreToLoopInvariantAddress ?
"" :
"not ")
1703 <<
"found in loop.\n";
1708 auto &LAI = LoopAccessInfoMap[L];
1711 assert((!LAI || LAI->NumSymbolicStrides == Strides.
size()) &&
1712 "Symbolic strides changed for loop");
1717 LAI = llvm::make_unique<LoopAccessInfo>(L, SE,
DL, TLI, AA, DT, LI,
1720 LAI->NumSymbolicStrides = Strides.
size();
1731 for (
Loop *TopLevelLoop : *LI)
1733 OS.
indent(2) << L->getHeader()->getName() <<
":\n";
1734 auto &LAI = LAA.
getInfo(L, NoSymbolicStrides);
1740 SE = &getAnalysis<ScalarEvolution>();
1741 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1742 TLI = TLIP ? &TLIP->getTLI() :
nullptr;
1743 AA = &getAnalysis<AliasAnalysis>();
1744 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1745 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1761 #define LAA_NAME "loop-accesses"
1772 return new LoopAccessAnalysis();
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
bool isPossiblyBackward() const
Lexically backward dependence types.
unsigned getNumBackEdges() const
getNumBackEdges - Calculate the number of back edges to the loop header
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Pass interface - Implemented by all 'passes'.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static unsigned RuntimeMemoryCheckThreshold
\brief When performing memory disambiguation checks at runtime do not make more than this number of c...
static const char laa_name[]
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
evaluateAtIteration - Return the value of this chain of recurrences at the specified iteration number...
void push_back(const T &Elt)
Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides)
Insert a pointer and calculate the start and end SCEVs.
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
SmallVector< CheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const
Get the absolute value;.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
uint64_t getZExtValue() const
Get zero extended value.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
static bool isInBoundsGep(Value *Ptr)
MDNode * TBAA
The tag for type-based alias analysis.
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
A Module instance is used to store all the information related to an LLVM module. ...
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
ScalarEvolution - This class is the main scalar evolution driver.
CallInst - This class represents a function call, abstracting a target machine's calling convention...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
getStepRecurrence - This method constructs and returns the recurrence indicating how much this expres...
static bool hasComputableBounds(ScalarEvolution *SE, const ValueToValueMap &Strides, Value *Ptr)
Check whether a pointer can participate in a runtime bounds check.
void reset()
Reset the state of the pointer runtime information.
const_iterator begin(StringRef path)
Get begin iterator over path.
bool isLoopInvariant(const SCEV *S, const Loop *L)
isLoopInvariant - Return true if the value of the given SCEV is unchanging in the specified loop...
LoadInst - an instruction for reading from memory.
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
member_iterator member_begin(iterator I) const
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
const SCEV * replaceSymbolicStrideSCEV(ScalarEvolution *SE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
BlockT * getHeader() const
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Type * getPointerElementType() const
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
bool isNegative() const
Determine sign of this APInt.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg)
Emit an optimization analysis remark message.
static const unsigned MaxVectorWidth
Maximum SIMD width.
This is the base class for all instructions that perform data casts.
const APInt & getValue() const
Return the constant as an APInt value reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
member_iterator member_end() const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const SmallVectorImpl< Dependence > * getInterestingDependences() const
Returns the interesting dependences.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
const Instruction * getInstr() const
static const char * DepName[]
String version of the types.
load Combine Adjacent Loads
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides)
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
void print(raw_ostream &OS, unsigned Depth=0, const SmallVectorImpl< int > *PtrPartition=nullptr) const
Print the list run-time memory checks necessary.
std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
StoreInst - an instruction for storing to memory.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
PointerType - Class to represent pointers.
Optimization analysis message produced during vectorization.
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
const SCEV * getCouldNotCompute()
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
bool isAffine() const
isAffine - Return true if this represents an expression A + B*x where A and B are loop invariant valu...
LLVM Basic Block Representation.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToValueMap &Map, bool InterpretConsts=false)
PointerIntPair - This class implements a pair of a pointer and small integer.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N, const SmallVectorImpl< int > *PtrPartition) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
int64_t getSExtValue() const
Get sign extended value.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
BlockT * getExitingBlock() const
getExitingBlock - If getExitingBlocks would return exactly one block, return that block...
Value * getPointerOperand()
int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap)
Check the stride of the pointer and ensure that it does not wrap in the address space.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
#define INITIALIZE_AG_DEPENDENCY(depName)
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
void groupChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Groups pointers such that a single memcheck is required between two different groups.
LLVMContext & getContext() const
All values hold a context through their type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Value * stripIntegerCast(Value *V)
static void emitAnalysis(const LoopAccessReport &Message, const Function *TheFunction, const Loop *TheLoop, const char *PassName)
Emit an analysis note for PassName with the debug location from the instruction in Message if availab...
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
unsigned getNumberOfChecks(const SmallVectorImpl< int > *PtrPartition) const
Returns the number of run-time checks required according to needsChecking.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
static void emitAnalysis(CallSite CS, const Twine &Msg)
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, ScalarEvolution *SE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Provides information about what library functions are available for the current target.
const LoopAccessInfo & getInfo(Loop *L, const ValueToValueMap &Strides)
Query the result of the loop access information for the loop L.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Drive the analysis of memory accesses in the loop.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
ConstantInt * getValue() const
static ConstantInt * getTrue(LLVMContext &Context)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Class for arbitrary precision integers.
bool needsAnyChecking(const SmallVectorImpl< int > *PtrPartition) const
Return true if any pointer requires run-time checking according to needsChecking. ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
std::vector< BlockT * >::const_iterator block_iterator
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
static cl::opt< unsigned > MaxInterestingDependence("max-interesting-dependences", cl::Hidden, cl::desc("Maximum number of interesting dependences collected by ""loop-access analysis (default = 100)"), cl::init(100))
We collect interesting dependences up to this threshold.
block_iterator block_end() const
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Dependece between memory access instructions.
SCEV - This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
std::error_code Check(std::error_code Err)
static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, unsigned TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
iterator find(const KeyT &Val)
const Loop * getLoop() const
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< df_iterator< T > > depth_first(const T &G)
const SCEV * getBackedgeTakenCount(const Loop *L)
getBackedgeTakenCount - If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCouldNotCompute object.
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
RuntimePointerChecking & RtCheck
Constitutes the context of this pointer checking group.
LLVM Value Representation.
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
void clearInterestingDependences()
This class implements an extremely fast bulk output stream that can only output to a stream...
block_iterator block_begin() const
The legacy pass manager's analysis pass to compute loop information.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Legacy analysis pass which computes a DominatorTree.
std::pair< Instruction *, Instruction * > addRuntimeCheck(Instruction *Loc, const SmallVectorImpl< int > *PtrPartition=nullptr) const
Add code that checks at runtime if the accessed arrays overlap.
static bool isInterestingDependence(DepType Type)
Dependence types that can be queried from the analysis.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Value * getPointerOperand()
LocationClass< Ty > location(Ty &L)
const BasicBlock * getParent() const
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge ""runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not ""generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
SCEVConstant - This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.