Bug Summary

File:llvm/lib/Analysis/LoopAccessAnalysis.cpp
Warning:line 1314, column 38
Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopAccessAnalysis.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Analysis -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Analysis -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-09-002921-48462-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/LoopAccessAnalysis.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DepthFirstIterator.h"
18#include "llvm/ADT/EquivalenceClasses.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/iterator_range.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/Analysis/AliasSetTracker.h"
28#include "llvm/Analysis/LoopAnalysisManager.h"
29#include "llvm/Analysis/LoopInfo.h"
30#include "llvm/Analysis/MemoryLocation.h"
31#include "llvm/Analysis/OptimizationRemarkEmitter.h"
32#include "llvm/Analysis/ScalarEvolution.h"
33#include "llvm/Analysis/ScalarEvolutionExpander.h"
34#include "llvm/Analysis/ScalarEvolutionExpressions.h"
35#include "llvm/Analysis/TargetLibraryInfo.h"
36#include "llvm/Analysis/ValueTracking.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DataLayout.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/DiagnosticInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/IRBuilder.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Operator.h"
51#include "llvm/IR/PassManager.h"
52#include "llvm/IR/Type.h"
53#include "llvm/IR/Value.h"
54#include "llvm/IR/ValueHandle.h"
55#include "llvm/Pass.h"
56#include "llvm/Support/Casting.h"
57#include "llvm/Support/CommandLine.h"
58#include "llvm/Support/Debug.h"
59#include "llvm/Support/ErrorHandling.h"
60#include "llvm/Support/raw_ostream.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <cstdlib>
65#include <iterator>
66#include <utility>
67#include <vector>
68
69using namespace llvm;
70
71#define DEBUG_TYPE"loop-accesses" "loop-accesses"
72
73static cl::opt<unsigned, true>
74VectorizationFactor("force-vector-width", cl::Hidden,
75 cl::desc("Sets the SIMD width. Zero is autoselect."),
76 cl::location(VectorizerParams::VectorizationFactor));
77unsigned VectorizerParams::VectorizationFactor;
78
79static cl::opt<unsigned, true>
80VectorizationInterleave("force-vector-interleave", cl::Hidden,
81 cl::desc("Sets the vectorization interleave count. "
82 "Zero is autoselect."),
83 cl::location(
84 VectorizerParams::VectorizationInterleave));
85unsigned VectorizerParams::VectorizationInterleave;
86
87static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
88 "runtime-memory-check-threshold", cl::Hidden,
89 cl::desc("When performing memory disambiguation checks at runtime do not "
90 "generate more than this number of comparisons (default = 8)."),
91 cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
92unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
93
94/// The maximum iterations used to merge memory checks
95static cl::opt<unsigned> MemoryCheckMergeThreshold(
96 "memory-check-merge-threshold", cl::Hidden,
97 cl::desc("Maximum number of comparisons done when trying to merge "
98 "runtime memory checks. (default = 100)"),
99 cl::init(100));
100
101/// Maximum SIMD width.
102const unsigned VectorizerParams::MaxVectorWidth = 64;
103
104/// We collect dependences up to this threshold.
105static cl::opt<unsigned>
106 MaxDependences("max-dependences", cl::Hidden,
107 cl::desc("Maximum number of dependences collected by "
108 "loop-access analysis (default = 100)"),
109 cl::init(100));
110
111/// This enables versioning on the strides of symbolically striding memory
112/// accesses in code like the following.
113/// for (i = 0; i < N; ++i)
114/// A[i * Stride1] += B[i * Stride2] ...
115///
116/// Will be roughly translated to
117/// if (Stride1 == 1 && Stride2 == 1) {
118/// for (i = 0; i < N; i+=4)
119/// A[i:i+3] += ...
120/// } else
121/// ...
122static cl::opt<bool> EnableMemAccessVersioning(
123 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
124 cl::desc("Enable symbolic stride memory access versioning"));
125
126/// Enable store-to-load forwarding conflict detection. This option can
127/// be disabled for correctness testing.
128static cl::opt<bool> EnableForwardingConflictDetection(
129 "store-to-load-forwarding-conflict-detection", cl::Hidden,
130 cl::desc("Enable conflict detection in loop-access analysis"),
131 cl::init(true));
132
133bool VectorizerParams::isInterleaveForced() {
134 return ::VectorizationInterleave.getNumOccurrences() > 0;
135}
136
137Value *llvm::stripIntegerCast(Value *V) {
138 if (auto *CI = dyn_cast<CastInst>(V))
139 if (CI->getOperand(0)->getType()->isIntegerTy())
140 return CI->getOperand(0);
141 return V;
142}
143
144const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
145 const ValueToValueMap &PtrToStride,
146 Value *Ptr, Value *OrigPtr) {
147 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
148
149 // If there is an entry in the map return the SCEV of the pointer with the
150 // symbolic stride replaced by one.
151 ValueToValueMap::const_iterator SI =
152 PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
153 if (SI != PtrToStride.end()) {
154 Value *StrideVal = SI->second;
155
156 // Strip casts.
157 StrideVal = stripIntegerCast(StrideVal);
158
159 ScalarEvolution *SE = PSE.getSE();
160 const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
161 const auto *CT =
162 static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
163
164 PSE.addPredicate(*SE->getEqualPredicate(U, CT));
165 auto *Expr = PSE.getSCEV(Ptr);
166
167 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEVdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " <<
*OrigSCEV << " by: " << *Expr << "\n"; } }
while (false)
168 << " by: " << *Expr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " <<
*OrigSCEV << " by: " << *Expr << "\n"; } }
while (false)
;
169 return Expr;
170 }
171
172 // Otherwise, just return the SCEV of the original pointer.
173 return OrigSCEV;
174}
175
176/// Calculate Start and End points of memory access.
177/// Let's assume A is the first access and B is a memory access on N-th loop
178/// iteration. Then B is calculated as:
179/// B = A + Step*N .
180/// Step value may be positive or negative.
181/// N is a calculated back-edge taken count:
182/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
183/// Start and End points are calculated in the following way:
184/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
185/// where SizeOfElt is the size of single memory access in bytes.
186///
187/// There is no conflict when the intervals are disjoint:
188/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
189void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
190 unsigned DepSetId, unsigned ASId,
191 const ValueToValueMap &Strides,
192 PredicatedScalarEvolution &PSE) {
193 // Get the stride replaced scev.
194 const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
195 ScalarEvolution *SE = PSE.getSE();
196
197 const SCEV *ScStart;
198 const SCEV *ScEnd;
199
200 if (SE->isLoopInvariant(Sc, Lp))
201 ScStart = ScEnd = Sc;
202 else {
203 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
204 assert(AR && "Invalid addrec expression")((AR && "Invalid addrec expression") ? static_cast<
void> (0) : __assert_fail ("AR && \"Invalid addrec expression\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 204, __PRETTY_FUNCTION__))
;
205 const SCEV *Ex = PSE.getBackedgeTakenCount();
206
207 ScStart = AR->getStart();
208 ScEnd = AR->evaluateAtIteration(Ex, *SE);
209 const SCEV *Step = AR->getStepRecurrence(*SE);
210
211 // For expressions with negative step, the upper bound is ScStart and the
212 // lower bound is ScEnd.
213 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
214 if (CStep->getValue()->isNegative())
215 std::swap(ScStart, ScEnd);
216 } else {
217 // Fallback case: the step is not constant, but we can still
218 // get the upper and lower bounds of the interval by using min/max
219 // expressions.
220 ScStart = SE->getUMinExpr(ScStart, ScEnd);
221 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
222 }
223 // Add the size of the pointed element to ScEnd.
224 unsigned EltSize =
225 Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
226 const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
227 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
228 }
229
230 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
231}
232
233SmallVector<RuntimePointerChecking::PointerCheck, 4>
234RuntimePointerChecking::generateChecks() const {
235 SmallVector<PointerCheck, 4> Checks;
236
237 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
238 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
239 const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
240 const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
241
242 if (needsChecking(CGI, CGJ))
243 Checks.push_back(std::make_pair(&CGI, &CGJ));
244 }
245 }
246 return Checks;
247}
248
249void RuntimePointerChecking::generateChecks(
250 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
251 assert(Checks.empty() && "Checks is not empty")((Checks.empty() && "Checks is not empty") ? static_cast
<void> (0) : __assert_fail ("Checks.empty() && \"Checks is not empty\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 251, __PRETTY_FUNCTION__))
;
252 groupChecks(DepCands, UseDependencies);
253 Checks = generateChecks();
254}
255
256bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
257 const CheckingPtrGroup &N) const {
258 for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
259 for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
260 if (needsChecking(M.Members[I], N.Members[J]))
261 return true;
262 return false;
263}
264
265/// Compare \p I and \p J and return the minimum.
266/// Return nullptr in case we couldn't find an answer.
267static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
268 ScalarEvolution *SE) {
269 const SCEV *Diff = SE->getMinusSCEV(J, I);
270 const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
271
272 if (!C)
273 return nullptr;
274 if (C->getValue()->isNegative())
275 return J;
276 return I;
277}
278
279bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
280 const SCEV *Start = RtCheck.Pointers[Index].Start;
281 const SCEV *End = RtCheck.Pointers[Index].End;
282
283 // Compare the starts and ends with the known minimum and maximum
284 // of this set. We need to know how we compare against the min/max
285 // of the set in order to be able to emit memchecks.
286 const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
287 if (!Min0)
288 return false;
289
290 const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
291 if (!Min1)
292 return false;
293
294 // Update the low bound expression if we've found a new min value.
295 if (Min0 == Start)
296 Low = Start;
297
298 // Update the high bound expression if we've found a new max value.
299 if (Min1 != End)
300 High = End;
301
302 Members.push_back(Index);
303 return true;
304}
305
306void RuntimePointerChecking::groupChecks(
307 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
308 // We build the groups from dependency candidates equivalence classes
309 // because:
310 // - We know that pointers in the same equivalence class share
311 // the same underlying object and therefore there is a chance
312 // that we can compare pointers
313 // - We wouldn't be able to merge two pointers for which we need
314 // to emit a memcheck. The classes in DepCands are already
315 // conveniently built such that no two pointers in the same
316 // class need checking against each other.
317
318 // We use the following (greedy) algorithm to construct the groups
319 // For every pointer in the equivalence class:
320 // For each existing group:
321 // - if the difference between this pointer and the min/max bounds
322 // of the group is a constant, then make the pointer part of the
323 // group and update the min/max bounds of that group as required.
324
325 CheckingGroups.clear();
326
327 // If we need to check two pointers to the same underlying object
328 // with a non-constant difference, we shouldn't perform any pointer
329 // grouping with those pointers. This is because we can easily get
330 // into cases where the resulting check would return false, even when
331 // the accesses are safe.
332 //
333 // The following example shows this:
334 // for (i = 0; i < 1000; ++i)
335 // a[5000 + i * m] = a[i] + a[i + 9000]
336 //
337 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
338 // (0, 10000) which is always false. However, if m is 1, there is no
339 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
340 // us to perform an accurate check in this case.
341 //
342 // The above case requires that we have an UnknownDependence between
343 // accesses to the same underlying object. This cannot happen unless
344 // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
345 // is also false. In this case we will use the fallback path and create
346 // separate checking groups for all pointers.
347
348 // If we don't have the dependency partitions, construct a new
349 // checking pointer group for each pointer. This is also required
350 // for correctness, because in this case we can have checking between
351 // pointers to the same underlying object.
352 if (!UseDependencies) {
353 for (unsigned I = 0; I < Pointers.size(); ++I)
354 CheckingGroups.push_back(CheckingPtrGroup(I, *this));
355 return;
356 }
357
358 unsigned TotalComparisons = 0;
359
360 DenseMap<Value *, unsigned> PositionMap;
361 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
362 PositionMap[Pointers[Index].PointerValue] = Index;
363
364 // We need to keep track of what pointers we've already seen so we
365 // don't process them twice.
366 SmallSet<unsigned, 2> Seen;
367
368 // Go through all equivalence classes, get the "pointer check groups"
369 // and add them to the overall solution. We use the order in which accesses
370 // appear in 'Pointers' to enforce determinism.
371 for (unsigned I = 0; I < Pointers.size(); ++I) {
372 // We've seen this pointer before, and therefore already processed
373 // its equivalence class.
374 if (Seen.count(I))
375 continue;
376
377 MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
378 Pointers[I].IsWritePtr);
379
380 SmallVector<CheckingPtrGroup, 2> Groups;
381 auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
382
383 // Because DepCands is constructed by visiting accesses in the order in
384 // which they appear in alias sets (which is deterministic) and the
385 // iteration order within an equivalence class member is only dependent on
386 // the order in which unions and insertions are performed on the
387 // equivalence class, the iteration order is deterministic.
388 for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
389 MI != ME; ++MI) {
390 unsigned Pointer = PositionMap[MI->getPointer()];
391 bool Merged = false;
392 // Mark this pointer as seen.
393 Seen.insert(Pointer);
394
395 // Go through all the existing sets and see if we can find one
396 // which can include this pointer.
397 for (CheckingPtrGroup &Group : Groups) {
398 // Don't perform more than a certain amount of comparisons.
399 // This should limit the cost of grouping the pointers to something
400 // reasonable. If we do end up hitting this threshold, the algorithm
401 // will create separate groups for all remaining pointers.
402 if (TotalComparisons > MemoryCheckMergeThreshold)
403 break;
404
405 TotalComparisons++;
406
407 if (Group.addPointer(Pointer)) {
408 Merged = true;
409 break;
410 }
411 }
412
413 if (!Merged)
414 // We couldn't add this pointer to any existing set or the threshold
415 // for the number of comparisons has been reached. Create a new group
416 // to hold the current pointer.
417 Groups.push_back(CheckingPtrGroup(Pointer, *this));
418 }
419
420 // We've computed the grouped checks for this partition.
421 // Save the results and continue with the next one.
422 llvm::copy(Groups, std::back_inserter(CheckingGroups));
423 }
424}
425
426bool RuntimePointerChecking::arePointersInSamePartition(
427 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
428 unsigned PtrIdx2) {
429 return (PtrToPartition[PtrIdx1] != -1 &&
430 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
431}
432
433bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
434 const PointerInfo &PointerI = Pointers[I];
435 const PointerInfo &PointerJ = Pointers[J];
436
437 // No need to check if two readonly pointers intersect.
438 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
439 return false;
440
441 // Only need to check pointers between two different dependency sets.
442 if (PointerI.DependencySetId == PointerJ.DependencySetId)
443 return false;
444
445 // Only need to check pointers in the same alias set.
446 if (PointerI.AliasSetId != PointerJ.AliasSetId)
447 return false;
448
449 return true;
450}
451
452void RuntimePointerChecking::printChecks(
453 raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
454 unsigned Depth) const {
455 unsigned N = 0;
456 for (const auto &Check : Checks) {
457 const auto &First = Check.first->Members, &Second = Check.second->Members;
458
459 OS.indent(Depth) << "Check " << N++ << ":\n";
460
461 OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
462 for (unsigned K = 0; K < First.size(); ++K)
463 OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
464
465 OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
466 for (unsigned K = 0; K < Second.size(); ++K)
467 OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
468 }
469}
470
471void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
472
473 OS.indent(Depth) << "Run-time memory checks:\n";
474 printChecks(OS, Checks, Depth);
475
476 OS.indent(Depth) << "Grouped accesses:\n";
477 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
478 const auto &CG = CheckingGroups[I];
479
480 OS.indent(Depth + 2) << "Group " << &CG << ":\n";
481 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
482 << ")\n";
483 for (unsigned J = 0; J < CG.Members.size(); ++J) {
484 OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
485 << "\n";
486 }
487 }
488}
489
490namespace {
491
492/// Analyses memory accesses in a loop.
493///
494/// Checks whether run time pointer checks are needed and builds sets for data
495/// dependence checking.
496class AccessAnalysis {
497public:
498 /// Read or write access location.
499 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
500 typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
501
502 AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
503 LoopInfo *LI, MemoryDepChecker::DepCandidates &DA,
504 PredicatedScalarEvolution &PSE)
505 : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
506 IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
507
508 /// Register a load and whether it is only read from.
509 void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
510 Value *Ptr = const_cast<Value*>(Loc.Ptr);
511 AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
512 Accesses.insert(MemAccessInfo(Ptr, false));
513 if (IsReadOnly)
514 ReadOnlyPtr.insert(Ptr);
515 }
516
517 /// Register a store.
518 void addStore(MemoryLocation &Loc) {
519 Value *Ptr = const_cast<Value*>(Loc.Ptr);
520 AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
521 Accesses.insert(MemAccessInfo(Ptr, true));
522 }
523
524 /// Check if we can emit a run-time no-alias check for \p Access.
525 ///
526 /// Returns true if we can emit a run-time no alias check for \p Access.
527 /// If we can check this access, this also adds it to a dependence set and
528 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
529 /// we will attempt to use additional run-time checks in order to get
530 /// the bounds of the pointer.
531 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
532 MemAccessInfo Access,
533 const ValueToValueMap &Strides,
534 DenseMap<Value *, unsigned> &DepSetId,
535 Loop *TheLoop, unsigned &RunningDepId,
536 unsigned ASId, bool ShouldCheckStride,
537 bool Assume);
538
539 /// Check whether we can check the pointers at runtime for
540 /// non-intersection.
541 ///
542 /// Returns true if we need no check or if we do and we can generate them
543 /// (i.e. the pointers have computable bounds).
544 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
545 Loop *TheLoop, const ValueToValueMap &Strides,
546 bool ShouldCheckWrap = false);
547
548 /// Goes over all memory accesses, checks whether a RT check is needed
549 /// and builds sets of dependent accesses.
550 void buildDependenceSets() {
551 processMemAccesses();
552 }
553
554 /// Initial processing of memory accesses determined that we need to
555 /// perform dependency checking.
556 ///
557 /// Note that this can later be cleared if we retry memcheck analysis without
558 /// dependency checking (i.e. FoundNonConstantDistanceDependence).
559 bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
560
561 /// We decided that no dependence analysis would be used. Reset the state.
562 void resetDepChecks(MemoryDepChecker &DepChecker) {
563 CheckDeps.clear();
564 DepChecker.clearDependences();
565 }
566
567 MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
568
569private:
570 typedef SetVector<MemAccessInfo> PtrAccessSet;
571
572 /// Go over all memory access and check whether runtime pointer checks
573 /// are needed and build sets of dependency check candidates.
574 void processMemAccesses();
575
576 /// Set of all accesses.
577 PtrAccessSet Accesses;
578
579 const DataLayout &DL;
580
581 /// The loop being checked.
582 const Loop *TheLoop;
583
584 /// List of accesses that need a further dependence check.
585 MemAccessInfoList CheckDeps;
586
587 /// Set of pointers that are read only.
588 SmallPtrSet<Value*, 16> ReadOnlyPtr;
589
590 /// An alias set tracker to partition the access set by underlying object and
591 //intrinsic property (such as TBAA metadata).
592 AliasSetTracker AST;
593
594 LoopInfo *LI;
595
596 /// Sets of potentially dependent accesses - members of one set share an
597 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
598 /// dependence check.
599 MemoryDepChecker::DepCandidates &DepCands;
600
601 /// Initial processing of memory accesses determined that we may need
602 /// to add memchecks. Perform the analysis to determine the necessary checks.
603 ///
604 /// Note that, this is different from isDependencyCheckNeeded. When we retry
605 /// memcheck analysis without dependency checking
606 /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
607 /// cleared while this remains set if we have potentially dependent accesses.
608 bool IsRTCheckAnalysisNeeded;
609
610 /// The SCEV predicate containing all the SCEV-related assumptions.
611 PredicatedScalarEvolution &PSE;
612};
613
614} // end anonymous namespace
615
616/// Check whether a pointer can participate in a runtime bounds check.
617/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
618/// by adding run-time checks (overflow checks) if necessary.
619static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
620 const ValueToValueMap &Strides, Value *Ptr,
621 Loop *L, bool Assume) {
622 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
623
624 // The bounds for loop-invariant pointer is trivial.
625 if (PSE.getSE()->isLoopInvariant(PtrScev, L))
626 return true;
627
628 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
629
630 if (!AR && Assume)
631 AR = PSE.getAsAddRec(Ptr);
632
633 if (!AR)
634 return false;
635
636 return AR->isAffine();
637}
638
639/// Check whether a pointer address cannot wrap.
640static bool isNoWrap(PredicatedScalarEvolution &PSE,
641 const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
642 const SCEV *PtrScev = PSE.getSCEV(Ptr);
643 if (PSE.getSE()->isLoopInvariant(PtrScev, L))
644 return true;
645
646 int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
647 if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
648 return true;
649
650 return false;
651}
652
653bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
654 MemAccessInfo Access,
655 const ValueToValueMap &StridesMap,
656 DenseMap<Value *, unsigned> &DepSetId,
657 Loop *TheLoop, unsigned &RunningDepId,
658 unsigned ASId, bool ShouldCheckWrap,
659 bool Assume) {
660 Value *Ptr = Access.getPointer();
661
662 if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
663 return false;
664
665 // When we run after a failing dependency check we have to make sure
666 // we don't have wrapping pointers.
667 if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
668 auto *Expr = PSE.getSCEV(Ptr);
669 if (!Assume || !isa<SCEVAddRecExpr>(Expr))
670 return false;
671 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
672 }
673
674 // The id of the dependence set.
675 unsigned DepId;
676
677 if (isDependencyCheckNeeded()) {
678 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
679 unsigned &LeaderId = DepSetId[Leader];
680 if (!LeaderId)
681 LeaderId = RunningDepId++;
682 DepId = LeaderId;
683 } else
684 // Each access has its own dependence set.
685 DepId = RunningDepId++;
686
687 bool IsWrite = Access.getInt();
688 RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
689 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a runtime check ptr:"
<< *Ptr << '\n'; } } while (false)
;
690
691 return true;
692 }
693
694bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
695 ScalarEvolution *SE, Loop *TheLoop,
696 const ValueToValueMap &StridesMap,
697 bool ShouldCheckWrap) {
698 // Find pointers with computable bounds. We are going to use this information
699 // to place a runtime bound check.
700 bool CanDoRT = true;
701
702 bool NeedRTCheck = false;
703 if (!IsRTCheckAnalysisNeeded) return true;
704
705 bool IsDepCheckNeeded = isDependencyCheckNeeded();
706
707 // We assign a consecutive id to access from different alias sets.
708 // Accesses between different groups doesn't need to be checked.
709 unsigned ASId = 1;
710 for (auto &AS : AST) {
711 int NumReadPtrChecks = 0;
712 int NumWritePtrChecks = 0;
713 bool CanDoAliasSetRT = true;
714
715 // We assign consecutive id to access from different dependence sets.
716 // Accesses within the same set don't need a runtime check.
717 unsigned RunningDepId = 1;
718 DenseMap<Value *, unsigned> DepSetId;
719
720 SmallVector<MemAccessInfo, 4> Retries;
721
722 for (auto A : AS) {
723 Value *Ptr = A.getValue();
724 bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
725 MemAccessInfo Access(Ptr, IsWrite);
726
727 if (IsWrite)
728 ++NumWritePtrChecks;
729 else
730 ++NumReadPtrChecks;
731
732 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
733 RunningDepId, ASId, ShouldCheckWrap, false)) {
734 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Can't find bounds for ptr:"
<< *Ptr << '\n'; } } while (false)
;
735 Retries.push_back(Access);
736 CanDoAliasSetRT = false;
737 }
738 }
739
740 // If we have at least two writes or one write and a read then we need to
741 // check them. But there is no need to checks if there is only one
742 // dependence set for this alias set.
743 //
744 // Note that this function computes CanDoRT and NeedRTCheck independently.
745 // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
746 // for which we couldn't find the bounds but we don't actually need to emit
747 // any checks so it does not matter.
748 bool NeedsAliasSetRTCheck = false;
749 if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
750 NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
751 (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
752
753 // We need to perform run-time alias checks, but some pointers had bounds
754 // that couldn't be checked.
755 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
756 // Reset the CanDoSetRt flag and retry all accesses that have failed.
757 // We know that we need these checks, so we can now be more aggressive
758 // and add further checks if required (overflow checks).
759 CanDoAliasSetRT = true;
760 for (auto Access : Retries)
761 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
762 TheLoop, RunningDepId, ASId,
763 ShouldCheckWrap, /*Assume=*/true)) {
764 CanDoAliasSetRT = false;
765 break;
766 }
767 }
768
769 CanDoRT &= CanDoAliasSetRT;
770 NeedRTCheck |= NeedsAliasSetRTCheck;
771 ++ASId;
772 }
773
774 // If the pointers that we would use for the bounds comparison have different
775 // address spaces, assume the values aren't directly comparable, so we can't
776 // use them for the runtime check. We also have to assume they could
777 // overlap. In the future there should be metadata for whether address spaces
778 // are disjoint.
779 unsigned NumPointers = RtCheck.Pointers.size();
780 for (unsigned i = 0; i < NumPointers; ++i) {
781 for (unsigned j = i + 1; j < NumPointers; ++j) {
782 // Only need to check pointers between two different dependency sets.
783 if (RtCheck.Pointers[i].DependencySetId ==
784 RtCheck.Pointers[j].DependencySetId)
785 continue;
786 // Only need to check pointers in the same alias set.
787 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
788 continue;
789
790 Value *PtrI = RtCheck.Pointers[i].PointerValue;
791 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
792
793 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
794 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
795 if (ASi != ASj) {
796 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
797 dbgs() << "LAA: Runtime check would require comparison between"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
798 " different address spaces\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
;
799 return false;
800 }
801 }
802 }
803
804 if (NeedRTCheck && CanDoRT)
805 RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
806
807 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We need to do " <<
RtCheck.getNumberOfChecks() << " pointer comparisons.\n"
; } } while (false)
808 << " pointer comparisons.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We need to do " <<
RtCheck.getNumberOfChecks() << " pointer comparisons.\n"
; } } while (false)
;
809
810 RtCheck.Need = NeedRTCheck;
811
812 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
813 if (!CanDoRTIfNeeded)
814 RtCheck.reset();
815 return CanDoRTIfNeeded;
816}
817
818void AccessAnalysis::processMemAccesses() {
819 // We process the set twice: first we process read-write pointers, last we
820 // process read-only pointers. This allows us to skip dependence tests for
821 // read-only pointers.
822
823 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Processing memory accesses...\n"
; } } while (false)
;
824 LLVM_DEBUG(dbgs() << " AST: "; AST.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " AST: "; AST.dump(); }
} while (false)
;
825 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Accesses(" <<
Accesses.size() << "):\n"; } } while (false)
;
826 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
827 for (auto A : Accesses)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
828 dbgs() << "\t" << *A.getPointer() << " (" <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
829 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
830 "read-only" : "read")) << ")\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
831 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
;
832
833 // The AliasSetTracker has nicely partitioned our pointers by metadata
834 // compatibility and potential for underlying-object overlap. As a result, we
835 // only need to check for potential pointer dependencies within each alias
836 // set.
837 for (auto &AS : AST) {
838 // Note that both the alias-set tracker and the alias sets themselves used
839 // linked lists internally and so the iteration order here is deterministic
840 // (matching the original instruction order within each set).
841
842 bool SetHasWrite = false;
843
844 // Map of pointers to last access encountered.
845 typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
846 UnderlyingObjToAccessMap ObjToLastAccess;
847
848 // Set of access to check after all writes have been processed.
849 PtrAccessSet DeferredAccesses;
850
851 // Iterate over each alias set twice, once to process read/write pointers,
852 // and then to process read-only pointers.
853 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
854 bool UseDeferred = SetIteration > 0;
855 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
856
857 for (auto AV : AS) {
858 Value *Ptr = AV.getValue();
859
860 // For a single memory access in AliasSetTracker, Accesses may contain
861 // both read and write, and they both need to be handled for CheckDeps.
862 for (auto AC : S) {
863 if (AC.getPointer() != Ptr)
864 continue;
865
866 bool IsWrite = AC.getInt();
867
868 // If we're using the deferred access set, then it contains only
869 // reads.
870 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
871 if (UseDeferred && !IsReadOnlyPtr)
872 continue;
873 // Otherwise, the pointer must be in the PtrAccessSet, either as a
874 // read or a write.
875 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 877, __PRETTY_FUNCTION__))
876 S.count(MemAccessInfo(Ptr, false))) &&((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 877, __PRETTY_FUNCTION__))
877 "Alias-set pointer not in the access set?")((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 877, __PRETTY_FUNCTION__))
;
878
879 MemAccessInfo Access(Ptr, IsWrite);
880 DepCands.insert(Access);
881
882 // Memorize read-only pointers for later processing and skip them in
883 // the first round (they need to be checked after we have seen all
884 // write pointers). Note: we also mark pointer that are not
885 // consecutive as "read-only" pointers (so that we check
886 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
887 if (!UseDeferred && IsReadOnlyPtr) {
888 DeferredAccesses.insert(Access);
889 continue;
890 }
891
892 // If this is a write - check other reads and writes for conflicts. If
893 // this is a read only check other writes for conflicts (but only if
894 // there is no other write to the ptr - this is an optimization to
895 // catch "a[i] = a[i] + " without having to do a dependence check).
896 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
897 CheckDeps.push_back(Access);
898 IsRTCheckAnalysisNeeded = true;
899 }
900
901 if (IsWrite)
902 SetHasWrite = true;
903
904 // Create sets of pointers connected by a shared alias set and
905 // underlying object.
906 typedef SmallVector<const Value *, 16> ValueVector;
907 ValueVector TempObjects;
908
909 GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
910 LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Underlying objects for pointer "
<< *Ptr << "\n"; } } while (false)
911 << "Underlying objects for pointer " << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Underlying objects for pointer "
<< *Ptr << "\n"; } } while (false)
;
912 for (const Value *UnderlyingObj : TempObjects) {
913 // nullptr never alias, don't join sets for pointer that have "null"
914 // in their UnderlyingObjects list.
915 if (isa<ConstantPointerNull>(UnderlyingObj) &&
916 !NullPointerIsDefined(
917 TheLoop->getHeader()->getParent(),
918 UnderlyingObj->getType()->getPointerAddressSpace()))
919 continue;
920
921 UnderlyingObjToAccessMap::iterator Prev =
922 ObjToLastAccess.find(UnderlyingObj);
923 if (Prev != ObjToLastAccess.end())
924 DepCands.unionSets(Access, Prev->second);
925
926 ObjToLastAccess[UnderlyingObj] = Access;
927 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " " << *UnderlyingObj
<< "\n"; } } while (false)
;
928 }
929 }
930 }
931 }
932 }
933}
934
935static bool isInBoundsGep(Value *Ptr) {
936 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
937 return GEP->isInBounds();
938 return false;
939}
940
941/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
942/// i.e. monotonically increasing/decreasing.
943static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
944 PredicatedScalarEvolution &PSE, const Loop *L) {
945 // FIXME: This should probably only return true for NUW.
946 if (AR->getNoWrapFlags(SCEV::NoWrapMask))
947 return true;
948
949 // Scalar evolution does not propagate the non-wrapping flags to values that
950 // are derived from a non-wrapping induction variable because non-wrapping
951 // could be flow-sensitive.
952 //
953 // Look through the potentially overflowing instruction to try to prove
954 // non-wrapping for the *specific* value of Ptr.
955
956 // The arithmetic implied by an inbounds GEP can't overflow.
957 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
958 if (!GEP || !GEP->isInBounds())
959 return false;
960
961 // Make sure there is only one non-const index and analyze that.
962 Value *NonConstIndex = nullptr;
963 for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
964 if (!isa<ConstantInt>(Index)) {
965 if (NonConstIndex)
966 return false;
967 NonConstIndex = Index;
968 }
969 if (!NonConstIndex)
970 // The recurrence is on the pointer, ignore for now.
971 return false;
972
973 // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
974 // AddRec using a NSW operation.
975 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
976 if (OBO->hasNoSignedWrap() &&
977 // Assume constant for other the operand so that the AddRec can be
978 // easily found.
979 isa<ConstantInt>(OBO->getOperand(1))) {
980 auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
981
982 if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
983 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
984 }
985
986 return false;
987}
988
989/// Check whether the access through \p Ptr has a constant stride.
990int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
991 const Loop *Lp, const ValueToValueMap &StridesMap,
992 bool Assume, bool ShouldCheckWrap) {
993 Type *Ty = Ptr->getType();
994 assert(Ty->isPointerTy() && "Unexpected non-ptr")((Ty->isPointerTy() && "Unexpected non-ptr") ? static_cast
<void> (0) : __assert_fail ("Ty->isPointerTy() && \"Unexpected non-ptr\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 994, __PRETTY_FUNCTION__))
;
995
996 // Make sure that the pointer does not point to aggregate types.
997 auto *PtrTy = cast<PointerType>(Ty);
998 if (PtrTy->getElementType()->isAggregateType()) {
999 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n"; } } while (false)
1000 << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n"; } } while (false)
;
1001 return 0;
1002 }
1003
1004 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1005
1006 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1007 if (Assume && !AR)
1008 AR = PSE.getAsAddRec(Ptr);
1009
1010 if (!AR) {
1011 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n"
; } } while (false)
1012 << " SCEV: " << *PtrScev << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n"
; } } while (false)
;
1013 return 0;
1014 }
1015
1016 // The access function must stride over the innermost loop.
1017 if (Lp != AR->getLoop()) {
1018 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1019 << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1020 return 0;
1021 }
1022
1023 // The address calculation must not wrap. Otherwise, a dependence could be
1024 // inverted.
1025 // An inbounds getelementptr that is a AddRec with a unit stride
1026 // cannot wrap per definition. The unit stride requirement is checked later.
1027 // An getelementptr without an inbounds attribute and unit stride would have
1028 // to access the pointer value "0" which is undefined behavior in address
1029 // space 0, therefore we can also vectorize this case.
1030 bool IsInBoundsGEP = isInBoundsGep(Ptr);
1031 bool IsNoWrapAddRec = !ShouldCheckWrap ||
1032 PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
1033 isNoWrapAddRec(Ptr, AR, PSE, Lp);
1034 if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1035 NullPointerIsDefined(Lp->getHeader()->getParent(),
1036 PtrTy->getAddressSpace())) {
1037 if (Assume) {
1038 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1039 IsNoWrapAddRec = true;
1040 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1041 << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1042 << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1043 << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
;
1044 } else {
1045 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1046 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1047 << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1048 return 0;
1049 }
1050 }
1051
1052 // Check the step is constant.
1053 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1054
1055 // Calculate the pointer stride and check if it is constant.
1056 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1057 if (!C) {
1058 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1059 << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1060 return 0;
1061 }
1062
1063 auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1064 int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1065 const APInt &APStepVal = C->getAPInt();
1066
1067 // Huge step value - give up.
1068 if (APStepVal.getBitWidth() > 64)
1069 return 0;
1070
1071 int64_t StepVal = APStepVal.getSExtValue();
1072
1073 // Strided access.
1074 int64_t Stride = StepVal / Size;
1075 int64_t Rem = StepVal % Size;
1076 if (Rem)
1077 return 0;
1078
1079 // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1080 // know we can't "wrap around the address space". In case of address space
1081 // zero we know that this won't happen without triggering undefined behavior.
1082 if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1083 (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1084 PtrTy->getAddressSpace()))) {
1085 if (Assume) {
1086 // We can avoid this case by adding a run-time check.
1087 LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1088 << "inbounds or in address space 0 may wrap:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1089 << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1090 << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1091 << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
;
1092 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1093 } else
1094 return 0;
1095 }
1096
1097 return Stride;
1098}
1099
1100bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
1101 ScalarEvolution &SE,
1102 SmallVectorImpl<unsigned> &SortedIndices) {
1103 assert(llvm::all_of(((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1105, __PRETTY_FUNCTION__))
1104 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1105, __PRETTY_FUNCTION__))
1105 "Expected list of pointer operands.")((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1105, __PRETTY_FUNCTION__))
;
1106 SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
1107 OffValPairs.reserve(VL.size());
1108
1109 // Walk over the pointers, and map each of them to an offset relative to
1110 // first pointer in the array.
1111 Value *Ptr0 = VL[0];
1112 const SCEV *Scev0 = SE.getSCEV(Ptr0);
1113 Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1114
1115 llvm::SmallSet<int64_t, 4> Offsets;
1116 for (auto *Ptr : VL) {
1117 // TODO: Outline this code as a special, more time consuming, version of
1118 // computeConstantDifference() function.
1119 if (Ptr->getType()->getPointerAddressSpace() !=
1120 Ptr0->getType()->getPointerAddressSpace())
1121 return false;
1122 // If a pointer refers to a different underlying object, bail - the
1123 // pointers are by definition incomparable.
1124 Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1125 if (CurrObj != Obj0)
1126 return false;
1127
1128 const SCEV *Scev = SE.getSCEV(Ptr);
1129 const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1130 // The pointers may not have a constant offset from each other, or SCEV
1131 // may just not be smart enough to figure out they do. Regardless,
1132 // there's nothing we can do.
1133 if (!Diff)
1134 return false;
1135
1136 // Check if the pointer with the same offset is found.
1137 int64_t Offset = Diff->getAPInt().getSExtValue();
1138 if (!Offsets.insert(Offset).second)
1139 return false;
1140 OffValPairs.emplace_back(Offset, Ptr);
1141 }
1142 SortedIndices.clear();
1143 SortedIndices.resize(VL.size());
1144 std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1145
1146 // Sort the memory accesses and keep the order of their uses in UseOrder.
1147 llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
1148 return OffValPairs[Left].first < OffValPairs[Right].first;
1149 });
1150
1151 // Check if the order is consecutive already.
1152 if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1153 return I == SortedIndices[I];
1154 }))
1155 SortedIndices.clear();
1156
1157 return true;
1158}
1159
1160/// Take the address space operand from the Load/Store instruction.
1161/// Returns -1 if this is not a valid Load/Store instruction.
1162static unsigned getAddressSpaceOperand(Value *I) {
1163 if (LoadInst *L = dyn_cast<LoadInst>(I))
1164 return L->getPointerAddressSpace();
1165 if (StoreInst *S = dyn_cast<StoreInst>(I))
1166 return S->getPointerAddressSpace();
1167 return -1;
1168}
1169
1170/// Returns true if the memory operations \p A and \p B are consecutive.
1171bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1172 ScalarEvolution &SE, bool CheckType) {
1173 Value *PtrA = getLoadStorePointerOperand(A);
1174 Value *PtrB = getLoadStorePointerOperand(B);
1175 unsigned ASA = getAddressSpaceOperand(A);
1176 unsigned ASB = getAddressSpaceOperand(B);
1177
1178 // Check that the address spaces match and that the pointers are valid.
1179 if (!PtrA || !PtrB || (ASA != ASB))
1180 return false;
1181
1182 // Make sure that A and B are different pointers.
1183 if (PtrA == PtrB)
1184 return false;
1185
1186 // Make sure that A and B have the same type if required.
1187 if (CheckType && PtrA->getType() != PtrB->getType())
1188 return false;
1189
1190 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1191 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1192
1193 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1194 PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1195 PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1196
1197 // Retrieve the address space again as pointer stripping now tracks through
1198 // `addrspacecast`.
1199 ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
1200 ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
1201 // Check that the address spaces match and that the pointers are valid.
1202 if (ASA != ASB)
1203 return false;
1204
1205 IdxWidth = DL.getIndexSizeInBits(ASA);
1206 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1207 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1208
1209 APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1210
1211 // OffsetDelta = OffsetB - OffsetA;
1212 const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1213 const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1214 const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1215 const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
1216
1217 // Check if they are based on the same pointer. That makes the offsets
1218 // sufficient.
1219 if (PtrA == PtrB)
1220 return OffsetDelta == Size;
1221
1222 // Compute the necessary base pointer delta to have the necessary final delta
1223 // equal to the size.
1224 // BaseDelta = Size - OffsetDelta;
1225 const SCEV *SizeSCEV = SE.getConstant(Size);
1226 const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1227
1228 // Otherwise compute the distance with SCEV between the base pointers.
1229 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1230 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1231 const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1232 return X == PtrSCEVB;
1233}
1234
1235MemoryDepChecker::VectorizationSafetyStatus
1236MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1237 switch (Type) {
1238 case NoDep:
1239 case Forward:
1240 case BackwardVectorizable:
1241 return VectorizationSafetyStatus::Safe;
1242
1243 case Unknown:
1244 return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1245 case ForwardButPreventsForwarding:
1246 case Backward:
1247 case BackwardVectorizableButPreventsForwarding:
1248 return VectorizationSafetyStatus::Unsafe;
1249 }
1250 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1250)
;
1251}
1252
1253bool MemoryDepChecker::Dependence::isBackward() const {
1254 switch (Type) {
1255 case NoDep:
1256 case Forward:
1257 case ForwardButPreventsForwarding:
1258 case Unknown:
1259 return false;
1260
1261 case BackwardVectorizable:
1262 case Backward:
1263 case BackwardVectorizableButPreventsForwarding:
1264 return true;
1265 }
1266 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1266)
;
1267}
1268
1269bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1270 return isBackward() || Type == Unknown;
1271}
1272
1273bool MemoryDepChecker::Dependence::isForward() const {
1274 switch (Type) {
1275 case Forward:
1276 case ForwardButPreventsForwarding:
1277 return true;
1278
1279 case NoDep:
1280 case Unknown:
1281 case BackwardVectorizable:
1282 case Backward:
1283 case BackwardVectorizableButPreventsForwarding:
1284 return false;
1285 }
1286 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1286)
;
1287}
1288
1289bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1290 uint64_t TypeByteSize) {
1291 // If loads occur at a distance that is not a multiple of a feasible vector
1292 // factor store-load forwarding does not take place.
1293 // Positive dependences might cause troubles because vectorizing them might
1294 // prevent store-load forwarding making vectorized code run a lot slower.
1295 // a[i] = a[i-3] ^ a[i-8];
1296 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1297 // hence on your typical architecture store-load forwarding does not take
1298 // place. Vectorizing in such cases does not make sense.
1299 // Store-load forwarding distance.
1300
1301 // After this many iterations store-to-load forwarding conflicts should not
1302 // cause any slowdowns.
1303 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1304 // Maximum vector factor.
1305 uint64_t MaxVFWithoutSLForwardIssues = std::min(
1306 VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1307
1308 // Compute the smallest VF at which the store and load would be misaligned.
1309 for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1310 VF *= 2) {
1311 // If the number of vector iteration between the store and the load are
1312 // small we could incur conflicts.
1313 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1314 MaxVFWithoutSLForwardIssues = (VF >>= 1);
Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF'
1315 break;
1316 }
1317 }
1318
1319 if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1320 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
1321 dbgs() << "LAA: Distance " << Distancedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
1322 << " that could cause a store-load forwarding conflict\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
;
1323 return true;
1324 }
1325
1326 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1327 MaxVFWithoutSLForwardIssues !=
1328 VectorizerParams::MaxVectorWidth * TypeByteSize)
1329 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1330 return false;
1331}
1332
1333void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1334 if (Status < S)
1335 Status = S;
1336}
1337
1338/// Given a non-constant (unknown) dependence-distance \p Dist between two
1339/// memory accesses, that have the same stride whose absolute value is given
1340/// in \p Stride, and that have the same type size \p TypeByteSize,
1341/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1342/// possible to prove statically that the dependence distance is larger
1343/// than the range that the accesses will travel through the execution of
1344/// the loop. If so, return true; false otherwise. This is useful for
1345/// example in loops such as the following (PR31098):
1346/// for (i = 0; i < D; ++i) {
1347/// = out[i];
1348/// out[i+D] =
1349/// }
1350static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1351 const SCEV &BackedgeTakenCount,
1352 const SCEV &Dist, uint64_t Stride,
1353 uint64_t TypeByteSize) {
1354
1355 // If we can prove that
1356 // (**) |Dist| > BackedgeTakenCount * Step
1357 // where Step is the absolute stride of the memory accesses in bytes,
1358 // then there is no dependence.
1359 //
1360 // Rationale:
1361 // We basically want to check if the absolute distance (|Dist/Step|)
1362 // is >= the loop iteration count (or > BackedgeTakenCount).
1363 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1364 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1365 // that the dependence distance is >= VF; This is checked elsewhere.
1366 // But in some cases we can prune unknown dependence distances early, and
1367 // even before selecting the VF, and without a runtime test, by comparing
1368 // the distance against the loop iteration count. Since the vectorized code
1369 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1370 // also guarantees that distance >= VF.
1371 //
1372 const uint64_t ByteStride = Stride * TypeByteSize;
1373 const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1374 const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1375
1376 const SCEV *CastedDist = &Dist;
1377 const SCEV *CastedProduct = Product;
1378 uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1379 uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1380
1381 // The dependence distance can be positive/negative, so we sign extend Dist;
1382 // The multiplication of the absolute stride in bytes and the
1383 // backedgeTakenCount is non-negative, so we zero extend Product.
1384 if (DistTypeSize > ProductTypeSize)
1385 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1386 else
1387 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1388
1389 // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1390 // (If so, then we have proven (**) because |Dist| >= Dist)
1391 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1392 if (SE.isKnownPositive(Minus))
1393 return true;
1394
1395 // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1396 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1397 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1398 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1399 if (SE.isKnownPositive(Minus))
1400 return true;
1401
1402 return false;
1403}
1404
1405/// Check the dependence for two accesses with the same stride \p Stride.
1406/// \p Distance is the positive distance and \p TypeByteSize is type size in
1407/// bytes.
1408///
1409/// \returns true if they are independent.
1410static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1411 uint64_t TypeByteSize) {
1412 assert(Stride > 1 && "The stride must be greater than 1")((Stride > 1 && "The stride must be greater than 1"
) ? static_cast<void> (0) : __assert_fail ("Stride > 1 && \"The stride must be greater than 1\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1412, __PRETTY_FUNCTION__))
;
1413 assert(TypeByteSize > 0 && "The type size in byte must be non-zero")((TypeByteSize > 0 && "The type size in byte must be non-zero"
) ? static_cast<void> (0) : __assert_fail ("TypeByteSize > 0 && \"The type size in byte must be non-zero\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1413, __PRETTY_FUNCTION__))
;
1414 assert(Distance > 0 && "The distance must be non-zero")((Distance > 0 && "The distance must be non-zero")
? static_cast<void> (0) : __assert_fail ("Distance > 0 && \"The distance must be non-zero\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1414, __PRETTY_FUNCTION__))
;
1415
1416 // Skip if the distance is not multiple of type byte size.
1417 if (Distance % TypeByteSize)
1418 return false;
1419
1420 uint64_t ScaledDist = Distance / TypeByteSize;
1421
1422 // No dependence if the scaled distance is not multiple of the stride.
1423 // E.g.
1424 // for (i = 0; i < 1024 ; i += 4)
1425 // A[i+2] = A[i] + 1;
1426 //
1427 // Two accesses in memory (scaled distance is 2, stride is 4):
1428 // | A[0] | | | | A[4] | | | |
1429 // | | | A[2] | | | | A[6] | |
1430 //
1431 // E.g.
1432 // for (i = 0; i < 1024 ; i += 3)
1433 // A[i+4] = A[i] + 1;
1434 //
1435 // Two accesses in memory (scaled distance is 4, stride is 3):
1436 // | A[0] | | | A[3] | | | A[6] | | |
1437 // | | | | | A[4] | | | A[7] | |
1438 return ScaledDist % Stride;
1439}
1440
1441MemoryDepChecker::Dependence::DepType
1442MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1443 const MemAccessInfo &B, unsigned BIdx,
1444 const ValueToValueMap &Strides) {
1445 assert (AIdx < BIdx && "Must pass arguments in program order")((AIdx < BIdx && "Must pass arguments in program order"
) ? static_cast<void> (0) : __assert_fail ("AIdx < BIdx && \"Must pass arguments in program order\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1445, __PRETTY_FUNCTION__))
;
1446
1447 Value *APtr = A.getPointer();
1448 Value *BPtr = B.getPointer();
1449 bool AIsWrite = A.getInt();
1450 bool BIsWrite = B.getInt();
1451
1452 // Two reads are independent.
1453 if (!AIsWrite && !BIsWrite)
1454 return Dependence::NoDep;
1455
1456 // We cannot check pointers in different address spaces.
1457 if (APtr->getType()->getPointerAddressSpace() !=
1458 BPtr->getType()->getPointerAddressSpace())
1459 return Dependence::Unknown;
1460
1461 int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1462 int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1463
1464 const SCEV *Src = PSE.getSCEV(APtr);
1465 const SCEV *Sink = PSE.getSCEV(BPtr);
1466
1467 // If the induction step is negative we have to invert source and sink of the
1468 // dependence.
1469 if (StrideAPtr < 0) {
1470 std::swap(APtr, BPtr);
1471 std::swap(Src, Sink);
1472 std::swap(AIsWrite, BIsWrite);
1473 std::swap(AIdx, BIdx);
1474 std::swap(StrideAPtr, StrideBPtr);
1475 }
1476
1477 const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1478
1479 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sinkdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Src Scev: " <<
*Src << "Sink Scev: " << *Sink << "(Induction step: "
<< StrideAPtr << ")\n"; } } while (false)
1480 << "(Induction step: " << StrideAPtr << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Src Scev: " <<
*Src << "Sink Scev: " << *Sink << "(Induction step: "
<< StrideAPtr << ")\n"; } } while (false)
;
1481 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance for " <<
*InstMap[AIdx] << " to " << *InstMap[BIdx] <<
": " << *Dist << "\n"; } } while (false)
1482 << *InstMap[BIdx] << ": " << *Dist << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance for " <<
*InstMap[AIdx] << " to " << *InstMap[BIdx] <<
": " << *Dist << "\n"; } } while (false)
;
1483
1484 // Need accesses with constant stride. We don't want to vectorize
1485 // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1486 // the address space.
1487 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1488 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Pointer access with non-constant stride\n"
; } } while (false)
;
1489 return Dependence::Unknown;
1490 }
1491
1492 Type *ATy = APtr->getType()->getPointerElementType();
1493 Type *BTy = BPtr->getType()->getPointerElementType();
1494 auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1495 uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1496 uint64_t Stride = std::abs(StrideAPtr);
1497 const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1498 if (!C) {
1499 if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1500 isSafeDependenceDistance(DL, *(PSE.getSE()),
1501 *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1502 TypeByteSize))
1503 return Dependence::NoDep;
1504
1505 LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Dependence because of non-constant distance\n"
; } } while (false)
;
1506 FoundNonConstantDistanceDependence = true;
1507 return Dependence::Unknown;
1508 }
1509
1510 const APInt &Val = C->getAPInt();
1511 int64_t Distance = Val.getSExtValue();
1512
1513 // Attempt to prove strided accesses independent.
1514 if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1515 areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1516 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Strided accesses are independent\n"
; } } while (false)
;
1517 return Dependence::NoDep;
1518 }
1519
1520 // Negative distances are not plausible dependencies.
1521 if (Val.isNegative()) {
1522 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1523 if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1524 (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1525 ATy != BTy)) {
1526 LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"
; } } while (false)
;
1527 return Dependence::ForwardButPreventsForwarding;
1528 }
1529
1530 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Dependence is negative\n"
; } } while (false)
;
1531 return Dependence::Forward;
1532 }
1533
1534 // Write to the same location with the same size.
1535 // Could be improved to assert type sizes are the same (i32 == float, etc).
1536 if (Val == 0) {
1537 if (ATy == BTy)
1538 return Dependence::Forward;
1539 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n"
; } } while (false)
1540 dbgs() << "LAA: Zero dependence difference but different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n"
; } } while (false)
;
1541 return Dependence::Unknown;
1542 }
1543
1544 assert(Val.isStrictlyPositive() && "Expect a positive value")((Val.isStrictlyPositive() && "Expect a positive value"
) ? static_cast<void> (0) : __assert_fail ("Val.isStrictlyPositive() && \"Expect a positive value\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1544, __PRETTY_FUNCTION__))
;
1545
1546 if (ATy != BTy) {
1547 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
1548 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
1549 << "LAA: ReadWrite-Write positive dependency with different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
;
1550 return Dependence::Unknown;
1551 }
1552
1553 // Bail out early if passed-in parameters make vectorization not feasible.
1554 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1555 VectorizerParams::VectorizationFactor : 1);
1556 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1557 VectorizerParams::VectorizationInterleave : 1);
1558 // The minimum number of iterations for a vectorized/unrolled version.
1559 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1560
1561 // It's not vectorizable if the distance is smaller than the minimum distance
1562 // needed for a vectroized/unrolled version. Vectorizing one iteration in
1563 // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1564 // TypeByteSize (No need to plus the last gap distance).
1565 //
1566 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1567 // foo(int *A) {
1568 // int *B = (int *)((char *)A + 14);
1569 // for (i = 0 ; i < 1024 ; i += 2)
1570 // B[i] = A[i] + 1;
1571 // }
1572 //
1573 // Two accesses in memory (stride is 2):
1574 // | A[0] | | A[2] | | A[4] | | A[6] | |
1575 // | B[0] | | B[2] | | B[4] |
1576 //
1577 // Distance needs for vectorizing iterations except the last iteration:
1578 // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1579 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1580 //
1581 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1582 // 12, which is less than distance.
1583 //
1584 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1585 // the minimum distance needed is 28, which is greater than distance. It is
1586 // not safe to do vectorization.
1587 uint64_t MinDistanceNeeded =
1588 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1589 if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1590 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n'; } } while (false)
1591 << Distance << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n'; } } while (false)
;
1592 return Dependence::Backward;
1593 }
1594
1595 // Unsafe if the minimum distance needed is greater than max safe distance.
1596 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1597 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes"; } } while
(false)
1598 << MinDistanceNeeded << " size in bytes")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes"; } } while
(false)
;
1599 return Dependence::Backward;
1600 }
1601
1602 // Positive distance bigger than max vectorization factor.
1603 // FIXME: Should use max factor instead of max distance in bytes, which could
1604 // not handle different types.
1605 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1606 // void foo (int *A, char *B) {
1607 // for (unsigned i = 0; i < 1024; i++) {
1608 // A[i+2] = A[i] + 1;
1609 // B[i+2] = B[i] + 1;
1610 // }
1611 // }
1612 //
1613 // This case is currently unsafe according to the max safe distance. If we
1614 // analyze the two accesses on array B, the max safe dependence distance
1615 // is 2. Then we analyze the accesses on array A, the minimum distance needed
1616 // is 8, which is less than 2 and forbidden vectorization, But actually
1617 // both A and B could be vectorized by 2 iterations.
1618 MaxSafeDepDistBytes =
1619 std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1620
1621 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1622 if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1623 couldPreventStoreLoadForward(Distance, TypeByteSize))
1624 return Dependence::BackwardVectorizableButPreventsForwarding;
1625
1626 uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1627 LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Positive distance "
<< Val.getSExtValue() << " with max VF = " <<
MaxVF << '\n'; } } while (false)
1628 << " with max VF = " << MaxVF << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Positive distance "
<< Val.getSExtValue() << " with max VF = " <<
MaxVF << '\n'; } } while (false)
;
1629 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1630 MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1631 return Dependence::BackwardVectorizable;
1632}
1633
1634bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
1635 MemAccessInfoList &CheckDeps,
1636 const ValueToValueMap &Strides) {
1637
1638 MaxSafeDepDistBytes = -1;
1639 SmallPtrSet<MemAccessInfo, 8> Visited;
1640 for (MemAccessInfo CurAccess : CheckDeps) {
1641 if (Visited.count(CurAccess))
1642 continue;
1643
1644 // Get the relevant memory access set.
1645 EquivalenceClasses<MemAccessInfo>::iterator I =
1646 AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1647
1648 // Check accesses within this set.
1649 EquivalenceClasses<MemAccessInfo>::member_iterator AI =
1650 AccessSets.member_begin(I);
1651 EquivalenceClasses<MemAccessInfo>::member_iterator AE =
1652 AccessSets.member_end();
1653
1654 // Check every access pair.
1655 while (AI != AE) {
1656 Visited.insert(*AI);
1657 bool AIIsWrite = AI->getInt();
1658 // Check loads only against next equivalent class, but stores also against
1659 // other stores in the same equivalence class - to the same address.
1660 EquivalenceClasses<MemAccessInfo>::member_iterator OI =
1661 (AIIsWrite ? AI : std::next(AI));
1662 while (OI != AE) {
1663 // Check every accessing instruction pair in program order.
1664 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1665 I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1666 // Scan all accesses of another equivalence class, but only the next
1667 // accesses of the same equivalent class.
1668 for (std::vector<unsigned>::iterator
1669 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1670 I2E = (OI == AI ? I1E : Accesses[*OI].end());
1671 I2 != I2E; ++I2) {
1672 auto A = std::make_pair(&*AI, *I1);
1673 auto B = std::make_pair(&*OI, *I2);
1674
1675 assert(*I1 != *I2)((*I1 != *I2) ? static_cast<void> (0) : __assert_fail (
"*I1 != *I2", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1675, __PRETTY_FUNCTION__))
;
1676 if (*I1 > *I2)
1677 std::swap(A, B);
1678
1679 Dependence::DepType Type =
1680 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1681 mergeInStatus(Dependence::isSafeForVectorization(Type));
1682
1683 // Gather dependences unless we accumulated MaxDependences
1684 // dependences. In that case return as soon as we find the first
1685 // unsafe dependence. This puts a limit on this quadratic
1686 // algorithm.
1687 if (RecordDependences) {
1688 if (Type != Dependence::NoDep)
1689 Dependences.push_back(Dependence(A.second, B.second, Type));
1690
1691 if (Dependences.size() >= MaxDependences) {
1692 RecordDependences = false;
1693 Dependences.clear();
1694 LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n"
; } } while (false)
1695 << "Too many dependences, stopped recording\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n"
; } } while (false)
;
1696 }
1697 }
1698 if (!RecordDependences && !isSafeForVectorization())
1699 return false;
1700 }
1701 ++OI;
1702 }
1703 AI++;
1704 }
1705 }
1706
1707 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Total Dependences: " <<
Dependences.size() << "\n"; } } while (false)
;
1708 return isSafeForVectorization();
1709}
1710
1711SmallVector<Instruction *, 4>
1712MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
1713 MemAccessInfo Access(Ptr, isWrite);
1714 auto &IndexVector = Accesses.find(Access)->second;
1715
1716 SmallVector<Instruction *, 4> Insts;
1717 transform(IndexVector,
1718 std::back_inserter(Insts),
1719 [&](unsigned Idx) { return this->InstMap[Idx]; });
1720 return Insts;
1721}
1722
1723const char *MemoryDepChecker::Dependence::DepName[] = {
1724 "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1725 "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1726
1727void MemoryDepChecker::Dependence::print(
1728 raw_ostream &OS, unsigned Depth,
1729 const SmallVectorImpl<Instruction *> &Instrs) const {
1730 OS.indent(Depth) << DepName[Type] << ":\n";
1731 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1732 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1733}
1734
1735bool LoopAccessInfo::canAnalyzeLoop() {
1736 // We need to have a loop header.
1737 LLVM_DEBUG(dbgs() << "LAA: Found a loop in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
1738 << TheLoop->getHeader()->getParent()->getName() << ": "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
1739 << TheLoop->getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
;
1740
1741 // We can only analyze innermost loops.
1742 if (!TheLoop->empty()) {
1743 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop is not the innermost loop\n"
; } } while (false)
;
1744 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1745 return false;
1746 }
1747
1748 // We must have a single backedge.
1749 if (TheLoop->getNumBackEdges() != 1) {
1750 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1751 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1752 recordAnalysis("CFGNotUnderstood")
1753 << "loop control flow is not understood by analyzer";
1754 return false;
1755 }
1756
1757 // We must have a single exiting block.
1758 if (!TheLoop->getExitingBlock()) {
1759 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1760 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1761 recordAnalysis("CFGNotUnderstood")
1762 << "loop control flow is not understood by analyzer";
1763 return false;
1764 }
1765
1766 // We only handle bottom-tested loops, i.e. loop in which the condition is
1767 // checked at the end of each iteration. With that we can assume that all
1768 // instructions in the loop are executed the same number of times.
1769 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1770 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1771 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1772 recordAnalysis("CFGNotUnderstood")
1773 << "loop control flow is not understood by analyzer";
1774 return false;
1775 }
1776
1777 // ScalarEvolution needs to be able to find the exit count.
1778 const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1779 if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1780 recordAnalysis("CantComputeNumberOfIterations")
1781 << "could not determine number of loop iterations";
1782 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: SCEV could not compute the loop exit count.\n"
; } } while (false)
;
1783 return false;
1784 }
1785
1786 return true;
1787}
1788
1789void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1790 const TargetLibraryInfo *TLI,
1791 DominatorTree *DT) {
1792 typedef SmallPtrSet<Value*, 16> ValueSet;
1793
1794 // Holds the Load and Store instructions.
1795 SmallVector<LoadInst *, 16> Loads;
1796 SmallVector<StoreInst *, 16> Stores;
1797
1798 // Holds all the different accesses in the loop.
1799 unsigned NumReads = 0;
1800 unsigned NumReadWrites = 0;
1801
1802 bool HasComplexMemInst = false;
1803
1804 // A runtime check is only legal to insert if there are no convergent calls.
1805 HasConvergentOp = false;
1806
1807 PtrRtChecking->Pointers.clear();
1808 PtrRtChecking->Need = false;
1809
1810 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1811
1812 // For each block.
1813 for (BasicBlock *BB : TheLoop->blocks()) {
1814 // Scan the BB and collect legal loads and stores. Also detect any
1815 // convergent instructions.
1816 for (Instruction &I : *BB) {
1817 if (auto *Call = dyn_cast<CallBase>(&I)) {
1818 if (Call->isConvergent())
1819 HasConvergentOp = true;
1820 }
1821
1822 // With both a non-vectorizable memory instruction and a convergent
1823 // operation, found in this loop, no reason to continue the search.
1824 if (HasComplexMemInst && HasConvergentOp) {
1825 CanVecMem = false;
1826 return;
1827 }
1828
1829 // Avoid hitting recordAnalysis multiple times.
1830 if (HasComplexMemInst)
1831 continue;
1832
1833 // If this is a load, save it. If this instruction can read from memory
1834 // but is not a load, then we quit. Notice that we don't handle function
1835 // calls that read or write.
1836 if (I.mayReadFromMemory()) {
1837 // Many math library functions read the rounding mode. We will only
1838 // vectorize a loop if it contains known function calls that don't set
1839 // the flag. Therefore, it is safe to ignore this read from memory.
1840 auto *Call = dyn_cast<CallInst>(&I);
1841 if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1842 continue;
1843
1844 // If the function has an explicit vectorized counterpart, we can safely
1845 // assume that it can be vectorized.
1846 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1847 TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1848 continue;
1849
1850 auto *Ld = dyn_cast<LoadInst>(&I);
1851 if (!Ld) {
1852 recordAnalysis("CantVectorizeInstruction", Ld)
1853 << "instruction cannot be vectorized";
1854 HasComplexMemInst = true;
1855 continue;
1856 }
1857 if (!Ld->isSimple() && !IsAnnotatedParallel) {
1858 recordAnalysis("NonSimpleLoad", Ld)
1859 << "read with atomic ordering or volatile read";
1860 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a non-simple load.\n"
; } } while (false)
;
1861 HasComplexMemInst = true;
1862 continue;
1863 }
1864 NumLoads++;
1865 Loads.push_back(Ld);
1866 DepChecker->addAccess(Ld);
1867 if (EnableMemAccessVersioning)
1868 collectStridedAccess(Ld);
1869 continue;
1870 }
1871
1872 // Save 'store' instructions. Abort if other instructions write to memory.
1873 if (I.mayWriteToMemory()) {
1874 auto *St = dyn_cast<StoreInst>(&I);
1875 if (!St) {
1876 recordAnalysis("CantVectorizeInstruction", St)
1877 << "instruction cannot be vectorized";
1878 HasComplexMemInst = true;
1879 continue;
1880 }
1881 if (!St->isSimple() && !IsAnnotatedParallel) {
1882 recordAnalysis("NonSimpleStore", St)
1883 << "write with atomic ordering or volatile write";
1884 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a non-simple store.\n"
; } } while (false)
;
1885 HasComplexMemInst = true;
1886 continue;
1887 }
1888 NumStores++;
1889 Stores.push_back(St);
1890 DepChecker->addAccess(St);
1891 if (EnableMemAccessVersioning)
1892 collectStridedAccess(St);
1893 }
1894 } // Next instr.
1895 } // Next block.
1896
1897 if (HasComplexMemInst) {
1898 CanVecMem = false;
1899 return;
1900 }
1901
1902 // Now we have two lists that hold the loads and the stores.
1903 // Next, we find the pointers that they use.
1904
1905 // Check if we see any stores. If there are no stores, then we don't
1906 // care if the pointers are *restrict*.
1907 if (!Stores.size()) {
1908 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a read-only loop!\n"
; } } while (false)
;
1909 CanVecMem = true;
1910 return;
1911 }
1912
1913 MemoryDepChecker::DepCandidates DependentAccesses;
1914 AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1915 TheLoop, AA, LI, DependentAccesses, *PSE);
1916
1917 // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1918 // multiple times on the same object. If the ptr is accessed twice, once
1919 // for read and once for write, it will only appear once (on the write
1920 // list). This is okay, since we are going to check for conflicts between
1921 // writes and between reads and writes, but not between reads and reads.
1922 ValueSet Seen;
1923
1924 // Record uniform store addresses to identify if we have multiple stores
1925 // to the same address.
1926 ValueSet UniformStores;
1927
1928 for (StoreInst *ST : Stores) {
1929 Value *Ptr = ST->getPointerOperand();
1930
1931 if (isUniform(Ptr))
1932 HasDependenceInvolvingLoopInvariantAddress |=
1933 !UniformStores.insert(Ptr).second;
1934
1935 // If we did *not* see this pointer before, insert it to the read-write
1936 // list. At this phase it is only a 'write' list.
1937 if (Seen.insert(Ptr).second) {
1938 ++NumReadWrites;
1939
1940 MemoryLocation Loc = MemoryLocation::get(ST);
1941 // The TBAA metadata could have a control dependency on the predication
1942 // condition, so we cannot rely on it when determining whether or not we
1943 // need runtime pointer checks.
1944 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1945 Loc.AATags.TBAA = nullptr;
1946
1947 Accesses.addStore(Loc);
1948 }
1949 }
1950
1951 if (IsAnnotatedParallel) {
1952 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
1953 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
1954 << "checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
;
1955 CanVecMem = true;
1956 return;
1957 }
1958
1959 for (LoadInst *LD : Loads) {
1960 Value *Ptr = LD->getPointerOperand();
1961 // If we did *not* see this pointer before, insert it to the
1962 // read list. If we *did* see it before, then it is already in
1963 // the read-write list. This allows us to vectorize expressions
1964 // such as A[i] += x; Because the address of A[i] is a read-write
1965 // pointer. This only works if the index of A[i] is consecutive.
1966 // If the address of i is unknown (for example A[B[i]]) then we may
1967 // read a few words, modify, and write a few words, and some of the
1968 // words may be written to the same address.
1969 bool IsReadOnlyPtr = false;
1970 if (Seen.insert(Ptr).second ||
1971 !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1972 ++NumReads;
1973 IsReadOnlyPtr = true;
1974 }
1975
1976 // See if there is an unsafe dependency between a load to a uniform address and
1977 // store to the same uniform address.
1978 if (UniformStores.count(Ptr)) {
1979 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n"; } } while (
false)
1980 "load and uniform store to the same address!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n"; } } while (
false)
;
1981 HasDependenceInvolvingLoopInvariantAddress = true;
1982 }
1983
1984 MemoryLocation Loc = MemoryLocation::get(LD);
1985 // The TBAA metadata could have a control dependency on the predication
1986 // condition, so we cannot rely on it when determining whether or not we
1987 // need runtime pointer checks.
1988 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1989 Loc.AATags.TBAA = nullptr;
1990
1991 Accesses.addLoad(Loc, IsReadOnlyPtr);
1992 }
1993
1994 // If we write (or read-write) to a single destination and there are no
1995 // other reads in this loop then is it safe to vectorize.
1996 if (NumReadWrites == 1 && NumReads == 0) {
1997 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a write-only loop!\n"
; } } while (false)
;
1998 CanVecMem = true;
1999 return;
2000 }
2001
2002 // Build dependence sets and check whether we need a runtime pointer bounds
2003 // check.
2004 Accesses.buildDependenceSets();
2005
2006 // Find pointers with computable bounds. We are going to use this information
2007 // to place a runtime bound check.
2008 bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2009 TheLoop, SymbolicStrides);
2010 if (!CanDoRTIfNeeded) {
2011 recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2012 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n"; } } while (false)
2013 << "the array bounds.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n"; } } while (false)
;
2014 CanVecMem = false;
2015 return;
2016 }
2017
2018 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"
; } } while (false)
2019 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"
; } } while (false)
;
2020
2021 CanVecMem = true;
2022 if (Accesses.isDependencyCheckNeeded()) {
2023 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Checking memory dependencies\n"
; } } while (false)
;
2024 CanVecMem = DepChecker->areDepsSafe(
2025 DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2026 MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2027
2028 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2029 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Retrying with memory checks\n"
; } } while (false)
;
2030
2031 // Clear the dependency checks. We assume they are not needed.
2032 Accesses.resetDepChecks(*DepChecker);
2033
2034 PtrRtChecking->reset();
2035 PtrRtChecking->Need = true;
2036
2037 auto *SE = PSE->getSE();
2038 CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2039 SymbolicStrides, true);
2040
2041 // Check that we found the bounds for the pointer.
2042 if (!CanDoRTIfNeeded) {
2043 recordAnalysis("CantCheckMemDepsAtRunTime")
2044 << "cannot check memory dependencies at runtime";
2045 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Can't vectorize with memory checks\n"
; } } while (false)
;
2046 CanVecMem = false;
2047 return;
2048 }
2049
2050 CanVecMem = true;
2051 }
2052 }
2053
2054 if (HasConvergentOp) {
2055 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2056 << "cannot add control dependency to convergent operation";
2057 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n"; } } while (
false)
2058 "would be needed with a convergent operation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n"; } } while (
false)
;
2059 CanVecMem = false;
2060 return;
2061 }
2062
2063 if (CanVecMem)
2064 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2065 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2066 << (PtrRtChecking->Need ? "" : " don't")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2067 << " need runtime memory checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
;
2068 else {
2069 recordAnalysis("UnsafeMemDep")
2070 << "unsafe dependent memory operations in loop. Use "
2071 "#pragma loop distribute(enable) to allow loop distribution "
2072 "to attempt to isolate the offending operations into a separate "
2073 "loop";
2074 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: unsafe dependent memory operations in loop\n"
; } } while (false)
;
2075 }
2076}
2077
2078bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
2079 DominatorTree *DT) {
2080 assert(TheLoop->contains(BB) && "Unknown block used")((TheLoop->contains(BB) && "Unknown block used") ?
static_cast<void> (0) : __assert_fail ("TheLoop->contains(BB) && \"Unknown block used\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2080, __PRETTY_FUNCTION__))
;
2081
2082 // Blocks that do not dominate the latch need predication.
2083 BasicBlock* Latch = TheLoop->getLoopLatch();
2084 return !DT->dominates(BB, Latch);
2085}
2086
2087OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2088 Instruction *I) {
2089 assert(!Report && "Multiple reports generated")((!Report && "Multiple reports generated") ? static_cast
<void> (0) : __assert_fail ("!Report && \"Multiple reports generated\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2089, __PRETTY_FUNCTION__))
;
2090
2091 Value *CodeRegion = TheLoop->getHeader();
2092 DebugLoc DL = TheLoop->getStartLoc();
2093
2094 if (I) {
2095 CodeRegion = I->getParent();
2096 // If there is no debug location attached to the instruction, revert back to
2097 // using the loop's.
2098 if (I->getDebugLoc())
2099 DL = I->getDebugLoc();
2100 }
2101
2102 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE"loop-accesses", RemarkName, DL,
2103 CodeRegion);
2104 return *Report;
2105}
2106
2107bool LoopAccessInfo::isUniform(Value *V) const {
2108 auto *SE = PSE->getSE();
2109 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2110 // never considered uniform.
2111 // TODO: Is this really what we want? Even without FP SCEV, we may want some
2112 // trivially loop-invariant FP values to be considered uniform.
2113 if (!SE->isSCEVable(V->getType()))
2114 return false;
2115 return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2116}
2117
2118// FIXME: this function is currently a duplicate of the one in
2119// LoopVectorize.cpp.
2120static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
2121 Instruction *Loc) {
2122 if (FirstInst)
2123 return FirstInst;
2124 if (Instruction *I = dyn_cast<Instruction>(V))
2125 return I->getParent() == Loc->getParent() ? I : nullptr;
2126 return nullptr;
2127}
2128
2129namespace {
2130
2131/// IR Values for the lower and upper bounds of a pointer evolution. We
2132/// need to use value-handles because SCEV expansion can invalidate previously
2133/// expanded values. Thus expansion of a pointer can invalidate the bounds for
2134/// a previous one.
2135struct PointerBounds {
2136 TrackingVH<Value> Start;
2137 TrackingVH<Value> End;
2138};
2139
2140} // end anonymous namespace
2141
2142/// Expand code for the lower and upper bound of the pointer group \p CG
2143/// in \p TheLoop. \return the values for the bounds.
2144static PointerBounds
2145expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
2146 Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2147 const RuntimePointerChecking &PtrRtChecking) {
2148 Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2149 const SCEV *Sc = SE->getSCEV(Ptr);
2150
2151 unsigned AS = Ptr->getType()->getPointerAddressSpace();
2152 LLVMContext &Ctx = Loc->getContext();
2153
2154 // Use this type for pointer arithmetic.
2155 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2156
2157 if (SE->isLoopInvariant(Sc, TheLoop)) {
2158 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
<< *Ptr << "\n"; } } while (false)
2159 << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
<< *Ptr << "\n"; } } while (false)
;
2160 // Ptr could be in the loop body. If so, expand a new one at the correct
2161 // location.
2162 Instruction *Inst = dyn_cast<Instruction>(Ptr);
2163 Value *NewPtr = (Inst && TheLoop->contains(Inst))
2164 ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2165 : Ptr;
2166 // We must return a half-open range, which means incrementing Sc.
2167 const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2168 Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2169 return {NewPtr, NewPtrPlusOne};
2170 } else {
2171 Value *Start = nullptr, *End = nullptr;
2172 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for range:\n"
; } } while (false)
;
2173 Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2174 End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2175 LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->Highdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Start: " << *CG->
Low << " End: " << *CG->High << "\n"; } }
while (false)
2176 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Start: " << *CG->
Low << " End: " << *CG->High << "\n"; } }
while (false)
;
2177 return {Start, End};
2178 }
2179}
2180
2181/// Turns a collection of checks into a collection of expanded upper and
2182/// lower bounds for both pointers in the check.
2183static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
2184 const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
2185 Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2186 const RuntimePointerChecking &PtrRtChecking) {
2187 SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
2188
2189 // Here we're relying on the SCEV Expander's cache to only emit code for the
2190 // same bounds once.
2191 transform(
2192 PointerChecks, std::back_inserter(ChecksWithBounds),
2193 [&](const RuntimePointerChecking::PointerCheck &Check) {
2194 PointerBounds
2195 First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2196 Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2197 return std::make_pair(First, Second);
2198 });
2199
2200 return ChecksWithBounds;
2201}
2202
2203std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2204 Instruction *Loc,
2205 const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
2206 const {
2207 const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2208 auto *SE = PSE->getSE();
2209 SCEVExpander Exp(*SE, DL, "induction");
2210 auto ExpandedChecks =
2211 expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2212
2213 LLVMContext &Ctx = Loc->getContext();
2214 Instruction *FirstInst = nullptr;
2215 IRBuilder<> ChkBuilder(Loc);
2216 // Our instructions might fold to a constant.
2217 Value *MemoryRuntimeCheck = nullptr;
2218
2219 for (const auto &Check : ExpandedChecks) {
2220 const PointerBounds &A = Check.first, &B = Check.second;
2221 // Check if two pointers (A and B) conflict where conflict is computed as:
2222 // start(A) <= end(B) && start(B) <= end(A)
2223 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2224 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2225
2226 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2228, __PRETTY_FUNCTION__))
2227 (AS1 == A.End->getType()->getPointerAddressSpace()) &&(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2228, __PRETTY_FUNCTION__))
2228 "Trying to bounds check pointers with different address spaces")(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2228, __PRETTY_FUNCTION__))
;
2229
2230 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2231 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2232
2233 Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2234 Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2235 Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2236 Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2237
2238 // [A|B].Start points to the first accessed byte under base [A|B].
2239 // [A|B].End points to the last accessed byte, plus one.
2240 // There is no conflict when the intervals are disjoint:
2241 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2242 //
2243 // bound0 = (B.Start < A.End)
2244 // bound1 = (A.Start < B.End)
2245 // IsConflict = bound0 & bound1
2246 Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2247 FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2248 Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2249 FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2250 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2251 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2252 if (MemoryRuntimeCheck) {
2253 IsConflict =
2254 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2255 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2256 }
2257 MemoryRuntimeCheck = IsConflict;
2258 }
2259
2260 if (!MemoryRuntimeCheck)
2261 return std::make_pair(nullptr, nullptr);
2262
2263 // We have to do this trickery because the IRBuilder might fold the check to a
2264 // constant expression in which case there is no Instruction anchored in a
2265 // the block.
2266 Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2267 ConstantInt::getTrue(Ctx));
2268 ChkBuilder.Insert(Check, "memcheck.conflict");
2269 FirstInst = getFirstInst(FirstInst, Check, Loc);
2270 return std::make_pair(FirstInst, Check);
2271}
2272
2273std::pair<Instruction *, Instruction *>
2274LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
2275 if (!PtrRtChecking->Need)
2276 return std::make_pair(nullptr, nullptr);
2277
2278 return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2279}
2280
2281void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2282 Value *Ptr = nullptr;
2283 if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2284 Ptr = LI->getPointerOperand();
2285 else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2286 Ptr = SI->getPointerOperand();
2287 else
2288 return;
2289
2290 Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2291 if (!Stride)
2292 return;
2293
2294 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:"; } } while (false)
2295 "versioning:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:"; } } while (false)
;
2296 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " Ptr: " << *Ptr <<
" Stride: " << *Stride << "\n"; } } while (false
)
;
2297
2298 // Avoid adding the "Stride == 1" predicate when we know that
2299 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2300 // or zero iteration loop, as Trip-Count <= Stride == 1.
2301 //
2302 // TODO: We are currently not making a very informed decision on when it is
2303 // beneficial to apply stride versioning. It might make more sense that the
2304 // users of this analysis (such as the vectorizer) will trigger it, based on
2305 // their specific cost considerations; For example, in cases where stride
2306 // versioning does not help resolving memory accesses/dependences, the
2307 // vectorizer should evaluate the cost of the runtime test, and the benefit
2308 // of various possible stride specializations, considering the alternatives
2309 // of using gather/scatters (if available).
2310
2311 const SCEV *StrideExpr = PSE->getSCEV(Stride);
2312 const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2313
2314 // Match the types so we can compare the stride and the BETakenCount.
2315 // The Stride can be positive/negative, so we sign extend Stride;
2316 // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2317 const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2318 uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2319 uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2320 const SCEV *CastedStride = StrideExpr;
2321 const SCEV *CastedBECount = BETakenCount;
2322 ScalarEvolution *SE = PSE->getSE();
2323 if (BETypeSize >= StrideTypeSize)
2324 CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2325 else
2326 CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2327 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2328 // Since TripCount == BackEdgeTakenCount + 1, checking:
2329 // "Stride >= TripCount" is equivalent to checking:
2330 // Stride - BETakenCount > 0
2331 if (SE->isKnownPositive(StrideMinusBETaken)) {
2332 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2333 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2334 "Stride==1 predicate will imply that the loop executes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2335 "at most once.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
;
2336 return;
2337 }
2338 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that we can version."
; } } while (false)
;
2339
2340 SymbolicStrides[Ptr] = Stride;
2341 StrideSet.insert(Stride);
2342}
2343
2344LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2345 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2346 DominatorTree *DT, LoopInfo *LI)
2347 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2348 PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2349 DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2350 NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2351 HasConvergentOp(false),
2352 HasDependenceInvolvingLoopInvariantAddress(false) {
2353 if (canAnalyzeLoop())
2354 analyzeLoop(AA, LI, TLI, DT);
2355}
2356
2357void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2358 if (CanVecMem) {
2359 OS.indent(Depth) << "Memory dependences are safe";
2360 if (MaxSafeDepDistBytes != -1ULL)
2361 OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2362 << " bytes";
2363 if (PtrRtChecking->Need)
2364 OS << " with run-time checks";
2365 OS << "\n";
2366 }
2367
2368 if (HasConvergentOp)
2369 OS.indent(Depth) << "Has convergent operation in loop\n";
2370
2371 if (Report)
2372 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2373
2374 if (auto *Dependences = DepChecker->getDependences()) {
2375 OS.indent(Depth) << "Dependences:\n";
2376 for (auto &Dep : *Dependences) {
2377 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2378 OS << "\n";
2379 }
2380 } else
2381 OS.indent(Depth) << "Too many dependences, not recorded\n";
2382
2383 // List the pair of accesses need run-time checks to prove independence.
2384 PtrRtChecking->print(OS, Depth);
2385 OS << "\n";
2386
2387 OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2388 << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2389 << "found in loop.\n";
2390
2391 OS.indent(Depth) << "SCEV assumptions:\n";
2392 PSE->getUnionPredicate().print(OS, Depth);
2393
2394 OS << "\n";
2395
2396 OS.indent(Depth) << "Expressions re-written:\n";
2397 PSE->print(OS, Depth);
2398}
2399
2400const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
2401 auto &LAI = LoopAccessInfoMap[L];
2402
2403 if (!LAI)
2404 LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2405
2406 return *LAI.get();
2407}
2408
2409void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
2410 LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2411
2412 for (Loop *TopLevelLoop : *LI)
2413 for (Loop *L : depth_first(TopLevelLoop)) {
2414 OS.indent(2) << L->getHeader()->getName() << ":\n";
2415 auto &LAI = LAA.getInfo(L);
2416 LAI.print(OS, 4);
2417 }
2418}
2419
2420bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
2421 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2422 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2423 TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2424 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2425 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2426 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2427
2428 return false;
2429}
2430
2431void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
2432 AU.addRequired<ScalarEvolutionWrapperPass>();
2433 AU.addRequired<AAResultsWrapperPass>();
2434 AU.addRequired<DominatorTreeWrapperPass>();
2435 AU.addRequired<LoopInfoWrapperPass>();
2436
2437 AU.setPreservesAll();
2438}
2439
2440char LoopAccessLegacyAnalysis::ID = 0;
2441static const char laa_name[] = "Loop Access Analysis";
2442#define LAA_NAME"loop-accesses" "loop-accesses"
2443
2444INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)static void *initializeLoopAccessLegacyAnalysisPassOnce(PassRegistry
&Registry) {
2445INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
2446INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
2447INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
2448INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
2449INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)PassInfo *PI = new PassInfo( laa_name, "loop-accesses", &
LoopAccessLegacyAnalysis::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopAccessLegacyAnalysis>), false, true); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLoopAccessLegacyAnalysisPassFlag
; void llvm::initializeLoopAccessLegacyAnalysisPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopAccessLegacyAnalysisPassFlag
, initializeLoopAccessLegacyAnalysisPassOnce, std::ref(Registry
)); }
2450
2451AnalysisKey LoopAccessAnalysis::Key;
2452
2453LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
2454 LoopStandardAnalysisResults &AR) {
2455 return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2456}
2457
2458namespace llvm {
2459
2460 Pass *createLAAPass() {
2461 return new LoopAccessLegacyAnalysis();
2462 }
2463
2464} // end namespace llvm