72 #define DEBUG_TYPE "loop-accesses"
76 cl::desc(
"Sets the SIMD width. Zero is autoselect."),
82 cl::desc(
"Sets the vectorization interleave count. "
83 "Zero is autoselect."),
90 cl::desc(
"When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
98 cl::desc(
"Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
108 cl::desc(
"Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
125 cl::desc(
"Enable symbolic stride memory access versioning"));
130 "store-to-load-forwarding-conflict-detection",
cl::Hidden,
131 cl::desc(
"Enable conflict detection in loop-access analysis"),
139 const Loop *TheLoop,
const char *PassName,
146 if (
I->getDebugLoc())
147 DL =
I->getDebugLoc();
154 if (
auto *CI = dyn_cast<CastInst>(V))
155 if (CI->getOperand(0)->getType()->isIntegerTy())
156 return CI->getOperand(0);
168 PtrToStride.
find(OrigPtr ? OrigPtr : Ptr);
169 if (SI != PtrToStride.
end()) {
170 Value *StrideVal = SI->second;
178 RewriteMap[StrideVal] = One;
181 const auto *U = cast<SCEVUnknown>(SE->
getSCEV(StrideVal));
188 DEBUG(
dbgs() <<
"LAA: Replacing SCEV: " << *OrigSCEV <<
" by: " << *Expr
211 unsigned DepSetId,
unsigned ASId,
222 ScStart = ScEnd = Sc;
225 assert(AR &&
"Invalid addrec expression");
234 if (
const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
235 if (CStep->getValue()->isNegative())
251 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
255 RuntimePointerChecking::generateChecks()
const {
264 Checks.
push_back(std::make_pair(&CGI, &CGJ));
270 void RuntimePointerChecking::generateChecks(
273 groupChecks(DepCands, UseDependencies);
280 for (
unsigned J = 0, EJ = N.
Members.
size(); EJ != J; ++J)
327 void RuntimePointerChecking::groupChecks(
373 if (!UseDependencies) {
379 unsigned TotalComparisons = 0;
382 for (
unsigned Index = 0; Index <
Pointers.size(); ++Index)
383 PositionMap[
Pointers[Index].PointerValue] = Index;
411 unsigned Pointer = PositionMap[
MI->getPointer()];
418 for (CheckingPtrGroup &Group : Groups) {
428 if (Group.addPointer(Pointer)) {
438 Groups.push_back(CheckingPtrGroup(Pointer, *
this));
443 std::copy(Groups.begin(), Groups.end(), std::back_inserter(
CheckingGroups));
450 return (PtrToPartition[PtrIdx1] != -1 &&
451 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
475 unsigned Depth)
const {
477 for (
const auto &
Check : Checks) {
478 const auto &First =
Check.first->Members, &Second =
Check.second->Members;
480 OS.
indent(Depth) <<
"Check " << N++ <<
":\n";
482 OS.
indent(Depth + 2) <<
"Comparing group (" <<
Check.first <<
"):\n";
483 for (
unsigned K = 0; K < First.size(); ++K)
486 OS.
indent(Depth + 2) <<
"Against group (" <<
Check.second <<
"):\n";
487 for (
unsigned K = 0; K < Second.size(); ++K)
488 OS.
indent(Depth + 2) << *
Pointers[Second[K]].PointerValue <<
"\n";
494 OS.
indent(Depth) <<
"Run-time memory checks:\n";
497 OS.
indent(Depth) <<
"Grouped accesses:\n";
501 OS.
indent(Depth + 2) <<
"Group " << &CG <<
":\n";
502 OS.
indent(Depth + 4) <<
"(Low: " << *CG.Low <<
" High: " << *CG.High
504 for (
unsigned J = 0; J < CG.Members.size(); ++J) {
505 OS.
indent(Depth + 6) <<
"Member: " << *
Pointers[CG.Members[J]].Expr
517 class AccessAnalysis {
526 : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(
false),
533 Accesses.insert(MemAccessInfo(Ptr,
false));
535 ReadOnlyPtr.insert(Ptr);
542 Accesses.insert(MemAccessInfo(Ptr,
true));
552 bool ShouldCheckWrap =
false);
556 void buildDependenceSets() {
557 processMemAccesses();
565 bool isDependencyCheckNeeded() {
return !CheckDeps.empty(); }
573 MemAccessInfoSet &getDependenciesToCheck() {
return CheckDeps; }
580 void processMemAccesses();
583 PtrAccessSet Accesses;
588 MemAccessInfoSet CheckDeps;
611 bool IsRTCheckAnalysisNeeded;
650 bool ShouldCheckWrap) {
655 bool NeedRTCheck =
false;
656 if (!IsRTCheckAnalysisNeeded)
return true;
658 bool IsDepCheckNeeded = isDependencyCheckNeeded();
663 for (
auto &AS : AST) {
664 int NumReadPtrChecks = 0;
665 int NumWritePtrChecks = 0;
669 unsigned RunningDepId = 1;
673 Value *Ptr =
A.getValue();
674 bool IsWrite = Accesses.count(MemAccessInfo(Ptr,
true));
675 MemAccessInfo Access(Ptr, IsWrite);
685 (!ShouldCheckWrap ||
isNoWrap(PSE, StridesMap, Ptr, TheLoop))) {
689 if (IsDepCheckNeeded) {
691 unsigned &LeaderId = DepSetId[Leader];
693 LeaderId = RunningDepId++;
697 DepId = RunningDepId++;
699 RtCheck.
insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
701 DEBUG(
dbgs() <<
"LAA: Found a runtime check ptr:" << *Ptr <<
'\n');
703 DEBUG(
dbgs() <<
"LAA: Can't find bounds for ptr:" << *Ptr <<
'\n');
716 if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
717 NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
718 NumWritePtrChecks >= 1));
728 unsigned NumPointers = RtCheck.
Pointers.size();
729 for (
unsigned i = 0;
i < NumPointers; ++
i) {
730 for (
unsigned j =
i + 1; j < NumPointers; ++j) {
732 if (RtCheck.
Pointers[
i].DependencySetId ==
733 RtCheck.
Pointers[j].DependencySetId)
745 DEBUG(
dbgs() <<
"LAA: Runtime check would require comparison between"
746 " different address spaces\n");
752 if (NeedRTCheck && CanDoRT)
756 <<
" pointer comparisons.\n");
758 RtCheck.
Need = NeedRTCheck;
760 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
761 if (!CanDoRTIfNeeded)
763 return CanDoRTIfNeeded;
766 void AccessAnalysis::processMemAccesses() {
771 DEBUG(
dbgs() <<
"LAA: Processing memory accesses...\n");
773 DEBUG(
dbgs() <<
"LAA: Accesses(" << Accesses.size() <<
"):\n");
775 for (
auto A : Accesses)
776 dbgs() <<
"\t" << *
A.getPointer() <<
" (" <<
777 (
A.getInt() ?
"write" : (ReadOnlyPtr.count(
A.getPointer()) ?
778 "read-only" :
"read")) <<
")\n";
785 for (
auto &AS : AST) {
790 bool SetHasWrite =
false;
794 UnderlyingObjToAccessMap ObjToLastAccess;
797 PtrAccessSet DeferredAccesses;
801 for (
int SetIteration = 0; SetIteration < 2; ++SetIteration) {
802 bool UseDeferred = SetIteration > 0;
803 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
806 Value *Ptr = AV.getValue();
811 if (AC.getPointer() !=
Ptr)
814 bool IsWrite = AC.getInt();
818 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
819 if (UseDeferred && !IsReadOnlyPtr)
823 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
824 S.count(MemAccessInfo(Ptr,
false))) &&
825 "Alias-set pointer not in the access set?");
827 MemAccessInfo Access(Ptr, IsWrite);
835 if (!UseDeferred && IsReadOnlyPtr) {
836 DeferredAccesses.insert(Access);
844 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
845 CheckDeps.insert(Access);
846 IsRTCheckAnalysisNeeded =
true;
855 ValueVector TempObjects;
858 DEBUG(
dbgs() <<
"Underlying objects for pointer " << *Ptr <<
"\n");
859 for (
Value *UnderlyingObj : TempObjects) {
862 if (isa<ConstantPointerNull>(UnderlyingObj))
865 UnderlyingObjToAccessMap::iterator Prev =
866 ObjToLastAccess.find(UnderlyingObj);
867 if (Prev != ObjToLastAccess.end())
868 DepCands.
unionSets(Access, Prev->second);
870 ObjToLastAccess[UnderlyingObj] = Access;
871 DEBUG(
dbgs() <<
" " << *UnderlyingObj <<
"\n");
881 return GEP->isInBounds();
902 if (!
GEP || !
GEP->isInBounds())
906 Value *NonConstIndex =
nullptr;
908 if (!isa<ConstantInt>(Index)) {
911 NonConstIndex = Index;
919 if (
auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
920 if (OBO->hasNoSignedWrap() &&
923 isa<ConstantInt>(OBO->getOperand(1))) {
924 auto *OpScev = PSE.
getSCEV(OBO->getOperand(0));
926 if (
auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
927 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(
SCEV::FlagNSW);
936 bool Assume,
bool ShouldCheckWrap) {
941 auto *PtrTy = cast<PointerType>(Ty);
942 if (PtrTy->getElementType()->isAggregateType()) {
943 DEBUG(
dbgs() <<
"LAA: Bad stride - Not a pointer to a scalar type" << *Ptr
955 DEBUG(
dbgs() <<
"LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
956 <<
" SCEV: " << *PtrScev <<
"\n");
962 DEBUG(
dbgs() <<
"LAA: Bad stride - Not striding over innermost loop " <<
963 *Ptr <<
" SCEV: " << *AR <<
"\n");
975 bool IsNoWrapAddRec = !ShouldCheckWrap ||
978 bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
979 if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
982 IsNoWrapAddRec =
true;
983 DEBUG(
dbgs() <<
"LAA: Pointer may wrap in the address space:\n"
984 <<
"LAA: Pointer: " << *Ptr <<
"\n"
985 <<
"LAA: SCEV: " << *AR <<
"\n"
986 <<
"LAA: Added an overflow assumption\n");
988 DEBUG(
dbgs() <<
"LAA: Bad stride - Pointer may wrap in the address space "
989 << *Ptr <<
" SCEV: " << *AR <<
"\n");
1000 DEBUG(
dbgs() <<
"LAA: Bad stride - Not a constant strided " << *Ptr <<
1001 " SCEV: " << *AR <<
"\n");
1005 auto &DL = Lp->
getHeader()->getModule()->getDataLayout();
1006 int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1016 int64_t Stride = StepVal / Size;
1017 int64_t Rem = StepVal % Size;
1024 if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
1025 Stride != 1 && Stride != -1) {
1028 DEBUG(
dbgs() <<
"LAA: Non unit strided pointer which is not either "
1029 <<
"inbouds or in address space 0 may wrap:\n"
1030 <<
"LAA: Pointer: " << *Ptr <<
"\n"
1031 <<
"LAA: SCEV: " << *AR <<
"\n"
1032 <<
"LAA: Added an overflow assumption\n");
1044 if (
auto *LI = dyn_cast<LoadInst>(I))
1045 return LI->getPointerOperand();
1046 if (
auto *
SI = dyn_cast<StoreInst>(I))
1047 return SI->getPointerOperand();
1054 if (
LoadInst *
L = dyn_cast<LoadInst>(I))
1055 return L->getPointerAddressSpace();
1056 if (
StoreInst *S = dyn_cast<StoreInst>(I))
1057 return S->getPointerAddressSpace();
1070 if (!PtrA || !PtrB || (ASA != ASB))
1082 Type *Ty = cast<PointerType>(PtrA->
getType())->getElementType();
1085 APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
1098 return OffsetDelta == Size;
1110 return X == PtrSCEVB;
1117 case BackwardVectorizable:
1121 case ForwardButPreventsForwarding:
1123 case BackwardVectorizableButPreventsForwarding:
1133 case ForwardButPreventsForwarding:
1137 case BackwardVectorizable:
1139 case BackwardVectorizableButPreventsForwarding:
1152 case ForwardButPreventsForwarding:
1157 case BackwardVectorizable:
1159 case BackwardVectorizableButPreventsForwarding:
1165 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1166 uint64_t TypeByteSize) {
1179 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1181 uint64_t MaxVFWithoutSLForwardIssues =
std::min(
1185 for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1189 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1190 MaxVFWithoutSLForwardIssues = (VF >>= 1);
1195 if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1196 DEBUG(
dbgs() <<
"LAA: Distance " << Distance
1197 <<
" that could cause a store-load forwarding conflict\n");
1201 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1202 MaxVFWithoutSLForwardIssues !=
1204 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1214 uint64_t TypeByteSize) {
1215 assert(Stride > 1 &&
"The stride must be greater than 1");
1216 assert(TypeByteSize > 0 &&
"The type size in byte must be non-zero");
1217 assert(Distance > 0 &&
"The distance must be non-zero");
1220 if (Distance % TypeByteSize)
1223 uint64_t ScaledDist = Distance / TypeByteSize;
1241 return ScaledDist % Stride;
1245 MemoryDepChecker::isDependent(
const MemAccessInfo &
A,
unsigned AIdx,
1246 const MemAccessInfo &
B,
unsigned BIdx,
1248 assert (AIdx < BIdx &&
"Must pass arguments in program order");
1250 Value *APtr = A.getPointer();
1251 Value *BPtr = B.getPointer();
1252 bool AIsWrite = A.getInt();
1253 bool BIsWrite = B.getInt();
1256 if (!AIsWrite && !BIsWrite)
1257 return Dependence::NoDep;
1264 int64_t StrideAPtr =
getPtrStride(PSE, APtr, InnermostLoop, Strides,
true);
1265 int64_t StrideBPtr =
getPtrStride(PSE, BPtr, InnermostLoop, Strides,
true);
1267 const SCEV *Src = PSE.getSCEV(APtr);
1268 const SCEV *
Sink = PSE.getSCEV(BPtr);
1272 if (StrideAPtr < 0) {
1280 const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1282 DEBUG(
dbgs() <<
"LAA: Src Scev: " << *Src <<
"Sink Scev: " << *Sink
1283 <<
"(Induction step: " << StrideAPtr <<
")\n");
1284 DEBUG(
dbgs() <<
"LAA: Distance for " << *InstMap[AIdx] <<
" to "
1285 << *InstMap[BIdx] <<
": " << *Dist <<
"\n");
1290 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1291 DEBUG(
dbgs() <<
"Pointer access with non-constant stride\n");
1297 DEBUG(
dbgs() <<
"LAA: Dependence because of non-constant distance\n");
1298 ShouldRetryWithRuntimeCheck =
true;
1304 auto &
DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1305 uint64_t TypeByteSize =
DL.getTypeAllocSize(ATy);
1309 uint64_t Stride =
std::abs(StrideAPtr);
1312 if (
std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1314 DEBUG(
dbgs() <<
"LAA: Strided accesses are independent\n");
1315 return Dependence::NoDep;
1320 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1322 (couldPreventStoreLoadForward(Val.
abs().
getZExtValue(), TypeByteSize) ||
1324 DEBUG(
dbgs() <<
"LAA: Forward but may prevent st->ld forwarding\n");
1325 return Dependence::ForwardButPreventsForwarding;
1328 DEBUG(
dbgs() <<
"LAA: Dependence is negative\n");
1329 return Dependence::Forward;
1336 return Dependence::Forward;
1337 DEBUG(
dbgs() <<
"LAA: Zero dependence difference but different types\n");
1345 "LAA: ReadWrite-Write positive dependency with different types\n");
1355 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1383 uint64_t MinDistanceNeeded =
1384 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1385 if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1386 DEBUG(
dbgs() <<
"LAA: Failure because of positive distance " << Distance
1388 return Dependence::Backward;
1392 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1393 DEBUG(
dbgs() <<
"LAA: Failure because it needs at least "
1394 << MinDistanceNeeded <<
" size in bytes");
1395 return Dependence::Backward;
1414 MaxSafeDepDistBytes =
1415 std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1417 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1419 couldPreventStoreLoadForward(Distance, TypeByteSize))
1420 return Dependence::BackwardVectorizableButPreventsForwarding;
1423 <<
" with max VF = "
1424 << MaxSafeDepDistBytes / (TypeByteSize * Stride) <<
'\n');
1426 return Dependence::BackwardVectorizable;
1433 MaxSafeDepDistBytes = -1;
1434 while (!CheckDeps.
empty()) {
1449 CheckDeps.
erase(*AI);
1453 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].
begin(),
1454 I1E = Accesses[*AI].
end(); I1 != I1E; ++I1)
1455 for (std::vector<unsigned>::iterator I2 = Accesses[*OI].
begin(),
1456 I2E = Accesses[*OI].
end(); I2 != I2E; ++I2) {
1457 auto A = std::make_pair(&*AI, *I1);
1458 auto B = std::make_pair(&*OI, *I2);
1465 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1466 SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1472 if (RecordDependences) {
1473 if (Type != Dependence::NoDep)
1474 Dependences.push_back(
Dependence(A.second, B.second, Type));
1477 RecordDependences =
false;
1478 Dependences.clear();
1479 DEBUG(
dbgs() <<
"Too many dependences, stopped recording\n");
1482 if (!RecordDependences && !SafeForVectorization)
1491 DEBUG(
dbgs() <<
"Total Dependences: " << Dependences.size() <<
"\n");
1492 return SafeForVectorization;
1498 auto &IndexVector = Accesses.find(Access)->second;
1502 std::back_inserter(Insts),
1503 [&](
unsigned Idx) {
return this->InstMap[Idx]; });
1508 "NoDep",
"Unknown",
"Forward",
"ForwardButPreventsForwarding",
"Backward",
1509 "BackwardVectorizable",
"BackwardVectorizableButPreventsForwarding"};
1516 OS.
indent(Depth + 2) << *Instrs[Destination] <<
"\n";
1519 bool LoopAccessInfo::canAnalyzeLoop() {
1522 << TheLoop->
getHeader()->getParent()->getName() <<
": "
1523 << TheLoop->
getHeader()->getName() <<
'\n');
1526 if (!TheLoop->
empty()) {
1527 DEBUG(
dbgs() <<
"LAA: loop is not the innermost loop\n");
1528 recordAnalysis(
"NotInnerMostLoop") <<
"loop is not the innermost loop";
1534 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1535 recordAnalysis(
"CFGNotUnderstood")
1536 <<
"loop control flow is not understood by analyzer";
1542 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1543 recordAnalysis(
"CFGNotUnderstood")
1544 <<
"loop control flow is not understood by analyzer";
1552 DEBUG(
dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
1553 recordAnalysis(
"CFGNotUnderstood")
1554 <<
"loop control flow is not understood by analyzer";
1559 const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1560 if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1561 recordAnalysis(
"CantComputeNumberOfIterations")
1562 <<
"could not determine number of loop iterations";
1563 DEBUG(
dbgs() <<
"LAA: SCEV could not compute the loop exit count.\n");
1580 unsigned NumReads = 0;
1581 unsigned NumReadWrites = 0;
1583 PtrRtChecking->Pointers.
clear();
1584 PtrRtChecking->Need =
false;
1595 if (
I.mayReadFromMemory()) {
1605 if (
Call && !
Call->isNoBuiltin() &&
Call->getCalledFunction() &&
1610 if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1611 recordAnalysis(
"NonSimpleLoad", Ld)
1612 <<
"read with atomic ordering or volatile read";
1613 DEBUG(
dbgs() <<
"LAA: Found a non-simple load.\n");
1619 DepChecker->addAccess(Ld);
1621 collectStridedAccess(Ld);
1626 if (
I.mayWriteToMemory()) {
1629 recordAnalysis(
"CantVectorizeInstruction", St)
1630 <<
"instruction cannot be vectorized";
1634 if (!St->isSimple() && !IsAnnotatedParallel) {
1635 recordAnalysis(
"NonSimpleStore", St)
1636 <<
"write with atomic ordering or volatile write";
1637 DEBUG(
dbgs() <<
"LAA: Found a non-simple store.\n");
1643 DepChecker->addAccess(St);
1645 collectStridedAccess(St);
1655 if (!Stores.
size()) {
1656 DEBUG(
dbgs() <<
"LAA: Found a read-only loop!\n");
1662 AccessAnalysis Accesses(TheLoop->
getHeader()->getModule()->getDataLayout(),
1663 AA, LI, DependentAccesses, *PSE);
1673 Value *Ptr =
ST->getPointerOperand();
1675 StoreToLoopInvariantAddress |= isUniform(Ptr);
1678 if (Seen.insert(Ptr).second) {
1685 if (blockNeedsPredication(
ST->getParent(), TheLoop, DT))
1688 Accesses.addStore(Loc);
1692 if (IsAnnotatedParallel) {
1694 <<
"LAA: A loop annotated parallel, ignore memory dependency "
1701 Value *Ptr =
LD->getPointerOperand();
1710 bool IsReadOnlyPtr =
false;
1711 if (Seen.insert(Ptr).second ||
1714 IsReadOnlyPtr =
true;
1721 if (blockNeedsPredication(
LD->getParent(), TheLoop, DT))
1724 Accesses.addLoad(Loc, IsReadOnlyPtr);
1729 if (NumReadWrites == 1 && NumReads == 0) {
1730 DEBUG(
dbgs() <<
"LAA: Found a write-only loop!\n");
1737 Accesses.buildDependenceSets();
1741 bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1742 TheLoop, SymbolicStrides);
1743 if (!CanDoRTIfNeeded) {
1744 recordAnalysis(
"CantIdentifyArrayBounds") <<
"cannot identify array bounds";
1745 DEBUG(
dbgs() <<
"LAA: We can't vectorize because we can't find "
1746 <<
"the array bounds.\n");
1751 DEBUG(
dbgs() <<
"LAA: We can perform a memory runtime check if needed.\n");
1754 if (Accesses.isDependencyCheckNeeded()) {
1755 DEBUG(
dbgs() <<
"LAA: Checking memory dependencies\n");
1756 CanVecMem = DepChecker->areDepsSafe(
1757 DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1758 MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1760 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1761 DEBUG(
dbgs() <<
"LAA: Retrying with memory checks\n");
1764 Accesses.resetDepChecks(*DepChecker);
1766 PtrRtChecking->reset();
1767 PtrRtChecking->Need =
true;
1769 auto *SE = PSE->getSE();
1770 CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1771 SymbolicStrides,
true);
1774 if (!CanDoRTIfNeeded) {
1775 recordAnalysis(
"CantCheckMemDepsAtRunTime")
1776 <<
"cannot check memory dependencies at runtime";
1777 DEBUG(
dbgs() <<
"LAA: Can't vectorize with memory checks\n");
1787 DEBUG(
dbgs() <<
"LAA: No unsafe dependent memory operations in loop. We"
1788 << (PtrRtChecking->Need ?
"" :
" don't")
1789 <<
" need runtime memory checks.\n");
1791 recordAnalysis(
"UnsafeMemDep")
1792 <<
"unsafe dependent memory operations in loop. Use "
1793 "#pragma loop distribute(enable) to allow loop distribution "
1794 "to attempt to isolate the offending operations into a separate "
1796 DEBUG(
dbgs() <<
"LAA: unsafe dependent memory operations in loop\n");
1811 assert(!Report &&
"Multiple reports generated");
1824 Report = make_unique<OptimizationRemarkAnalysis>(
DEBUG_TYPE, RemarkName, DL,
1830 auto *SE = PSE->getSE();
1857 struct PointerBounds {
1866 static PointerBounds
1880 DEBUG(
dbgs() <<
"LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1888 return {NewPtr, NewPtr};
1890 Value *Start =
nullptr, *
End =
nullptr;
1891 DEBUG(
dbgs() <<
"LAA: Adding RT check for range:\n");
1895 return {Start,
End};
1910 PointerChecks, std::back_inserter(ChecksWithBounds),
1913 First =
expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
1914 Second =
expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
1915 return std::make_pair(First, Second);
1918 return ChecksWithBounds;
1926 auto *SE = PSE->getSE();
1928 auto ExpandedChecks =
1929 expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
1935 Value *MemoryRuntimeCheck =
nullptr;
1937 for (
const auto &
Check : ExpandedChecks) {
1938 const PointerBounds &A =
Check.first, &B =
Check.second;
1941 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
1942 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
1944 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
1945 (AS1 == A.End->getType()->getPointerAddressSpace()) &&
1946 "Trying to bounds check pointers with different address spaces");
1968 Value *IsConflict = ChkBuilder.
CreateAnd(Cmp0, Cmp1,
"found.conflict");
1970 if (MemoryRuntimeCheck) {
1972 ChkBuilder.
CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
1975 MemoryRuntimeCheck = IsConflict;
1978 if (!MemoryRuntimeCheck)
1979 return std::make_pair(
nullptr,
nullptr);
1986 ChkBuilder.
Insert(Check,
"memcheck.conflict");
1988 return std::make_pair(FirstInst, Check);
1991 std::pair<Instruction *, Instruction *>
1993 if (!PtrRtChecking->Need)
1994 return std::make_pair(
nullptr,
nullptr);
1996 return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
1999 void LoopAccessInfo::collectStridedAccess(
Value *MemAccess) {
2000 Value *Ptr =
nullptr;
2001 if (
LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2002 Ptr = LI->getPointerOperand();
2003 else if (
StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2004 Ptr =
SI->getPointerOperand();
2012 DEBUG(
dbgs() <<
"LAA: Found a strided access that we can version");
2013 DEBUG(
dbgs() <<
" Ptr: " << *Ptr <<
" Stride: " << *Stride <<
"\n");
2014 SymbolicStrides[
Ptr] = Stride;
2015 StrideSet.insert(Stride);
2024 NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(
false),
2025 StoreToLoopInvariantAddress(
false) {
2026 if (canAnalyzeLoop())
2027 analyzeLoop(AA, LI, TLI, DT);
2032 OS.
indent(Depth) <<
"Memory dependences are safe";
2033 if (MaxSafeDepDistBytes != -1ULL)
2034 OS <<
" with a maximum dependence distance of " << MaxSafeDepDistBytes
2036 if (PtrRtChecking->Need)
2037 OS <<
" with run-time checks";
2042 OS.
indent(Depth) <<
"Report: " << Report->getMsg() <<
"\n";
2044 if (
auto *Dependences = DepChecker->getDependences()) {
2045 OS.
indent(Depth) <<
"Dependences:\n";
2046 for (
auto &Dep : *Dependences) {
2047 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2051 OS.
indent(Depth) <<
"Too many dependences, not recorded\n";
2054 PtrRtChecking->print(OS, Depth);
2057 OS.
indent(Depth) <<
"Store to invariant address was "
2058 << (StoreToLoopInvariantAddress ?
"" :
"not ")
2059 <<
"found in loop.\n";
2061 OS.
indent(Depth) <<
"SCEV assumptions:\n";
2062 PSE->getUnionPredicate().print(OS, Depth);
2066 OS.
indent(Depth) <<
"Expressions re-written:\n";
2067 PSE->print(OS, Depth);
2071 auto &LAI = LoopAccessInfoMap[
L];
2074 LAI = llvm::make_unique<LoopAccessInfo>(
L, SE, TLI, AA, DT, LI);
2082 for (
Loop *TopLevelLoop : *LI)
2091 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2092 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2093 TLI = TLIP ? &TLIP->getTLI() :
nullptr;
2094 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2095 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2096 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2112 #define LAA_NAME "loop-accesses"
2125 return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Pass interface - Implemented by all 'passes'.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static unsigned RuntimeMemoryCheckThreshold
\brief When performing memory disambiguation checks at runtime do not make more than this number of c...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
SmallVector< CheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
uint64_t getZExtValue() const
Get zero extended value.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
MDNode * TBAA
The tag for type-based alias analysis.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
A Module instance is used to store all the information related to an LLVM module. ...
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void reset()
Reset the state of the pointer runtime information.
const_iterator begin(StringRef path)
Get begin iterator over path.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
An instruction for reading from memory.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
member_iterator member_begin(iterator I) const
BlockT * getHeader() const
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by ""loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Type * getPointerElementType() const
const SCEV * getStart() const
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool isNegative() const
Determine sign of this APInt.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
member_iterator member_end() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Accumulate offsets from stripInBoundsConstantOffsets().
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
const Instruction * getInstr() const
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
LLVM_NODISCARD bool empty() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Function Alias Analysis false
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
iterator_range< block_iterator > blocks() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Optimization analysis message produced during vectorization.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&...args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
LLVM Basic Block Representation.
PointerIntPair - This class implements a pair of a pointer and small integer.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
Type * getType() const
Return the LLVM type of this SCEV expression.
Value handle that tracks a Value across RAUW.
This analysis provides dependence information for the memory accesses of a loop.
int64_t getSExtValue() const
Get sign extended value.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
LLVM_NODISCARD bool empty() const
const APInt & getAPInt() const
bool isPointerTy() const
True if this is an instance of PointerType.
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer can participate in a runtime bounds check.
LLVMContext & getContext() const
All values hold a context through their type.
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Value * stripIntegerCast(Value *V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
bool isBackward() const
Lexically backward dependence.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
bool isForward() const
Lexically forward dependence.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Drive the analysis of memory accesses in the loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
ConstantInt * getValue() const
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Class for arbitrary precision integers.
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Dependece between memory access instructions.
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
bool isFunctionVectorizable(StringRef F, unsigned VF) const
Represents a single loop in the control flow graph.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static bool isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI)
This file provides utility analysis objects describing memory locations.
static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop, const char *PassName, OptimizationRemarkEmitter &ORE)
Emit an analysis note for PassName with the debug location from the instruction in Message if availab...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
APFloat abs(APFloat X)
Returns the absolute value of the argument.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
iterator find(const KeyT &Val)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
static Value * getPointerOperand(Value *I)
Take the pointer operand from the Load/Store instruction.
const Loop * getLoop() const
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
iterator_range< df_iterator< T > > depth_first(const T &G)
const SCEVPredicate * getEqualPredicate(const SCEVUnknown *LHS, const SCEVConstant *RHS)
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
RuntimePointerChecking & RtCheck
Constitutes the context of this pointer checking group.
LLVM Value Representation.
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
This class implements an extremely fast bulk output stream that can only output to a stream...
The legacy pass manager's analysis pass to compute loop information.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
APInt abs() const
Get the absolute value;.
This header defines various interfaces for pass management in LLVM.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
A special type used by analysis passes to provide an address that identifies that particular analysis...
LocationClass< Ty > location(Ty &L)
const BasicBlock * getParent() const
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge ""runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not ""generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)