Bug Summary

File:llvm/lib/Analysis/LoopAccessAnalysis.cpp
Warning:line 1315, column 38
Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LoopAccessAnalysis.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Analysis -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Analysis -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/LoopAccessAnalysis.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DepthFirstIterator.h"
18#include "llvm/ADT/EquivalenceClasses.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/iterator_range.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/Analysis/AliasSetTracker.h"
28#include "llvm/Analysis/LoopAnalysisManager.h"
29#include "llvm/Analysis/LoopInfo.h"
30#include "llvm/Analysis/MemoryLocation.h"
31#include "llvm/Analysis/OptimizationRemarkEmitter.h"
32#include "llvm/Analysis/ScalarEvolution.h"
33#include "llvm/Analysis/ScalarEvolutionExpander.h"
34#include "llvm/Analysis/ScalarEvolutionExpressions.h"
35#include "llvm/Analysis/TargetLibraryInfo.h"
36#include "llvm/Analysis/ValueTracking.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DataLayout.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/DiagnosticInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/IRBuilder.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Operator.h"
51#include "llvm/IR/PassManager.h"
52#include "llvm/IR/Type.h"
53#include "llvm/IR/Value.h"
54#include "llvm/IR/ValueHandle.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/Pass.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <cstdlib>
66#include <iterator>
67#include <utility>
68#include <vector>
69
70using namespace llvm;
71
72#define DEBUG_TYPE"loop-accesses" "loop-accesses"
73
74static cl::opt<unsigned, true>
75VectorizationFactor("force-vector-width", cl::Hidden,
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
77 cl::location(VectorizerParams::VectorizationFactor));
78unsigned VectorizerParams::VectorizationFactor;
79
80static cl::opt<unsigned, true>
81VectorizationInterleave("force-vector-interleave", cl::Hidden,
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
84 cl::location(
85 VectorizerParams::VectorizationInterleave));
86unsigned VectorizerParams::VectorizationInterleave;
87
88static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
92 cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
93unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
94
95/// The maximum iterations used to merge memory checks
96static cl::opt<unsigned> MemoryCheckMergeThreshold(
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
100 cl::init(100));
101
102/// Maximum SIMD width.
103const unsigned VectorizerParams::MaxVectorWidth = 64;
104
105/// We collect dependences up to this threshold.
106static cl::opt<unsigned>
107 MaxDependences("max-dependences", cl::Hidden,
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
110 cl::init(100));
111
112/// This enables versioning on the strides of symbolically striding memory
113/// accesses in code like the following.
114/// for (i = 0; i < N; ++i)
115/// A[i * Stride1] += B[i * Stride2] ...
116///
117/// Will be roughly translated to
118/// if (Stride1 == 1 && Stride2 == 1) {
119/// for (i = 0; i < N; i+=4)
120/// A[i:i+3] += ...
121/// } else
122/// ...
123static cl::opt<bool> EnableMemAccessVersioning(
124 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127/// Enable store-to-load forwarding conflict detection. This option can
128/// be disabled for correctness testing.
129static cl::opt<bool> EnableForwardingConflictDetection(
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
132 cl::init(true));
133
134bool VectorizerParams::isInterleaveForced() {
135 return ::VectorizationInterleave.getNumOccurrences() > 0;
136}
137
138Value *llvm::stripIntegerCast(Value *V) {
139 if (auto *CI = dyn_cast<CastInst>(V))
140 if (CI->getOperand(0)->getType()->isIntegerTy())
141 return CI->getOperand(0);
142 return V;
143}
144
145const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
146 const ValueToValueMap &PtrToStride,
147 Value *Ptr, Value *OrigPtr) {
148 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149
150 // If there is an entry in the map return the SCEV of the pointer with the
151 // symbolic stride replaced by one.
152 ValueToValueMap::const_iterator SI =
153 PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154 if (SI != PtrToStride.end()) {
155 Value *StrideVal = SI->second;
156
157 // Strip casts.
158 StrideVal = stripIntegerCast(StrideVal);
159
160 ScalarEvolution *SE = PSE.getSE();
161 const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162 const auto *CT =
163 static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164
165 PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166 auto *Expr = PSE.getSCEV(Ptr);
167
168 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEVdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " <<
*OrigSCEV << " by: " << *Expr << "\n"; } }
while (false)
169 << " by: " << *Expr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " <<
*OrigSCEV << " by: " << *Expr << "\n"; } }
while (false)
;
170 return Expr;
171 }
172
173 // Otherwise, just return the SCEV of the original pointer.
174 return OrigSCEV;
175}
176
177/// Calculate Start and End points of memory access.
178/// Let's assume A is the first access and B is a memory access on N-th loop
179/// iteration. Then B is calculated as:
180/// B = A + Step*N .
181/// Step value may be positive or negative.
182/// N is a calculated back-edge taken count:
183/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184/// Start and End points are calculated in the following way:
185/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186/// where SizeOfElt is the size of single memory access in bytes.
187///
188/// There is no conflict when the intervals are disjoint:
189/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191 unsigned DepSetId, unsigned ASId,
192 const ValueToValueMap &Strides,
193 PredicatedScalarEvolution &PSE) {
194 // Get the stride replaced scev.
195 const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196 ScalarEvolution *SE = PSE.getSE();
197
198 const SCEV *ScStart;
199 const SCEV *ScEnd;
200
201 if (SE->isLoopInvariant(Sc, Lp))
202 ScStart = ScEnd = Sc;
203 else {
204 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205 assert(AR && "Invalid addrec expression")((AR && "Invalid addrec expression") ? static_cast<
void> (0) : __assert_fail ("AR && \"Invalid addrec expression\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 205, __PRETTY_FUNCTION__))
;
206 const SCEV *Ex = PSE.getBackedgeTakenCount();
207
208 ScStart = AR->getStart();
209 ScEnd = AR->evaluateAtIteration(Ex, *SE);
210 const SCEV *Step = AR->getStepRecurrence(*SE);
211
212 // For expressions with negative step, the upper bound is ScStart and the
213 // lower bound is ScEnd.
214 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215 if (CStep->getValue()->isNegative())
216 std::swap(ScStart, ScEnd);
217 } else {
218 // Fallback case: the step is not constant, but we can still
219 // get the upper and lower bounds of the interval by using min/max
220 // expressions.
221 ScStart = SE->getUMinExpr(ScStart, ScEnd);
222 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223 }
224 // Add the size of the pointed element to ScEnd.
225 unsigned EltSize =
226 Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
227 const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229 }
230
231 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232}
233
234SmallVector<RuntimePointerChecking::PointerCheck, 4>
235RuntimePointerChecking::generateChecks() const {
236 SmallVector<PointerCheck, 4> Checks;
237
238 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240 const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241 const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242
243 if (needsChecking(CGI, CGJ))
244 Checks.push_back(std::make_pair(&CGI, &CGJ));
245 }
246 }
247 return Checks;
248}
249
250void RuntimePointerChecking::generateChecks(
251 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252 assert(Checks.empty() && "Checks is not empty")((Checks.empty() && "Checks is not empty") ? static_cast
<void> (0) : __assert_fail ("Checks.empty() && \"Checks is not empty\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 252, __PRETTY_FUNCTION__))
;
253 groupChecks(DepCands, UseDependencies);
254 Checks = generateChecks();
255}
256
257bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
258 const CheckingPtrGroup &N) const {
259 for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260 for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261 if (needsChecking(M.Members[I], N.Members[J]))
262 return true;
263 return false;
264}
265
266/// Compare \p I and \p J and return the minimum.
267/// Return nullptr in case we couldn't find an answer.
268static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269 ScalarEvolution *SE) {
270 const SCEV *Diff = SE->getMinusSCEV(J, I);
271 const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272
273 if (!C)
274 return nullptr;
275 if (C->getValue()->isNegative())
276 return J;
277 return I;
278}
279
280bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
281 const SCEV *Start = RtCheck.Pointers[Index].Start;
282 const SCEV *End = RtCheck.Pointers[Index].End;
283
284 // Compare the starts and ends with the known minimum and maximum
285 // of this set. We need to know how we compare against the min/max
286 // of the set in order to be able to emit memchecks.
287 const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288 if (!Min0)
289 return false;
290
291 const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292 if (!Min1)
293 return false;
294
295 // Update the low bound expression if we've found a new min value.
296 if (Min0 == Start)
297 Low = Start;
298
299 // Update the high bound expression if we've found a new max value.
300 if (Min1 != End)
301 High = End;
302
303 Members.push_back(Index);
304 return true;
305}
306
307void RuntimePointerChecking::groupChecks(
308 MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309 // We build the groups from dependency candidates equivalence classes
310 // because:
311 // - We know that pointers in the same equivalence class share
312 // the same underlying object and therefore there is a chance
313 // that we can compare pointers
314 // - We wouldn't be able to merge two pointers for which we need
315 // to emit a memcheck. The classes in DepCands are already
316 // conveniently built such that no two pointers in the same
317 // class need checking against each other.
318
319 // We use the following (greedy) algorithm to construct the groups
320 // For every pointer in the equivalence class:
321 // For each existing group:
322 // - if the difference between this pointer and the min/max bounds
323 // of the group is a constant, then make the pointer part of the
324 // group and update the min/max bounds of that group as required.
325
326 CheckingGroups.clear();
327
328 // If we need to check two pointers to the same underlying object
329 // with a non-constant difference, we shouldn't perform any pointer
330 // grouping with those pointers. This is because we can easily get
331 // into cases where the resulting check would return false, even when
332 // the accesses are safe.
333 //
334 // The following example shows this:
335 // for (i = 0; i < 1000; ++i)
336 // a[5000 + i * m] = a[i] + a[i + 9000]
337 //
338 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339 // (0, 10000) which is always false. However, if m is 1, there is no
340 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341 // us to perform an accurate check in this case.
342 //
343 // The above case requires that we have an UnknownDependence between
344 // accesses to the same underlying object. This cannot happen unless
345 // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
346 // is also false. In this case we will use the fallback path and create
347 // separate checking groups for all pointers.
348
349 // If we don't have the dependency partitions, construct a new
350 // checking pointer group for each pointer. This is also required
351 // for correctness, because in this case we can have checking between
352 // pointers to the same underlying object.
353 if (!UseDependencies) {
354 for (unsigned I = 0; I < Pointers.size(); ++I)
355 CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356 return;
357 }
358
359 unsigned TotalComparisons = 0;
360
361 DenseMap<Value *, unsigned> PositionMap;
362 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363 PositionMap[Pointers[Index].PointerValue] = Index;
364
365 // We need to keep track of what pointers we've already seen so we
366 // don't process them twice.
367 SmallSet<unsigned, 2> Seen;
368
369 // Go through all equivalence classes, get the "pointer check groups"
370 // and add them to the overall solution. We use the order in which accesses
371 // appear in 'Pointers' to enforce determinism.
372 for (unsigned I = 0; I < Pointers.size(); ++I) {
373 // We've seen this pointer before, and therefore already processed
374 // its equivalence class.
375 if (Seen.count(I))
376 continue;
377
378 MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379 Pointers[I].IsWritePtr);
380
381 SmallVector<CheckingPtrGroup, 2> Groups;
382 auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383
384 // Because DepCands is constructed by visiting accesses in the order in
385 // which they appear in alias sets (which is deterministic) and the
386 // iteration order within an equivalence class member is only dependent on
387 // the order in which unions and insertions are performed on the
388 // equivalence class, the iteration order is deterministic.
389 for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390 MI != ME; ++MI) {
391 unsigned Pointer = PositionMap[MI->getPointer()];
392 bool Merged = false;
393 // Mark this pointer as seen.
394 Seen.insert(Pointer);
395
396 // Go through all the existing sets and see if we can find one
397 // which can include this pointer.
398 for (CheckingPtrGroup &Group : Groups) {
399 // Don't perform more than a certain amount of comparisons.
400 // This should limit the cost of grouping the pointers to something
401 // reasonable. If we do end up hitting this threshold, the algorithm
402 // will create separate groups for all remaining pointers.
403 if (TotalComparisons > MemoryCheckMergeThreshold)
404 break;
405
406 TotalComparisons++;
407
408 if (Group.addPointer(Pointer)) {
409 Merged = true;
410 break;
411 }
412 }
413
414 if (!Merged)
415 // We couldn't add this pointer to any existing set or the threshold
416 // for the number of comparisons has been reached. Create a new group
417 // to hold the current pointer.
418 Groups.push_back(CheckingPtrGroup(Pointer, *this));
419 }
420
421 // We've computed the grouped checks for this partition.
422 // Save the results and continue with the next one.
423 llvm::copy(Groups, std::back_inserter(CheckingGroups));
424 }
425}
426
427bool RuntimePointerChecking::arePointersInSamePartition(
428 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429 unsigned PtrIdx2) {
430 return (PtrToPartition[PtrIdx1] != -1 &&
431 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432}
433
434bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435 const PointerInfo &PointerI = Pointers[I];
436 const PointerInfo &PointerJ = Pointers[J];
437
438 // No need to check if two readonly pointers intersect.
439 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440 return false;
441
442 // Only need to check pointers between two different dependency sets.
443 if (PointerI.DependencySetId == PointerJ.DependencySetId)
444 return false;
445
446 // Only need to check pointers in the same alias set.
447 if (PointerI.AliasSetId != PointerJ.AliasSetId)
448 return false;
449
450 return true;
451}
452
453void RuntimePointerChecking::printChecks(
454 raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455 unsigned Depth) const {
456 unsigned N = 0;
457 for (const auto &Check : Checks) {
458 const auto &First = Check.first->Members, &Second = Check.second->Members;
459
460 OS.indent(Depth) << "Check " << N++ << ":\n";
461
462 OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463 for (unsigned K = 0; K < First.size(); ++K)
464 OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465
466 OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467 for (unsigned K = 0; K < Second.size(); ++K)
468 OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469 }
470}
471
472void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
473
474 OS.indent(Depth) << "Run-time memory checks:\n";
475 printChecks(OS, Checks, Depth);
476
477 OS.indent(Depth) << "Grouped accesses:\n";
478 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479 const auto &CG = CheckingGroups[I];
480
481 OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483 << ")\n";
484 for (unsigned J = 0; J < CG.Members.size(); ++J) {
485 OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486 << "\n";
487 }
488 }
489}
490
491namespace {
492
493/// Analyses memory accesses in a loop.
494///
495/// Checks whether run time pointer checks are needed and builds sets for data
496/// dependence checking.
497class AccessAnalysis {
498public:
499 /// Read or write access location.
500 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501 typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502
503 AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
504 LoopInfo *LI, MemoryDepChecker::DepCandidates &DA,
505 PredicatedScalarEvolution &PSE)
506 : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507 IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
508
509 /// Register a load and whether it is only read from.
510 void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511 Value *Ptr = const_cast<Value*>(Loc.Ptr);
512 AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
513 Accesses.insert(MemAccessInfo(Ptr, false));
514 if (IsReadOnly)
515 ReadOnlyPtr.insert(Ptr);
516 }
517
518 /// Register a store.
519 void addStore(MemoryLocation &Loc) {
520 Value *Ptr = const_cast<Value*>(Loc.Ptr);
521 AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
522 Accesses.insert(MemAccessInfo(Ptr, true));
523 }
524
525 /// Check if we can emit a run-time no-alias check for \p Access.
526 ///
527 /// Returns true if we can emit a run-time no alias check for \p Access.
528 /// If we can check this access, this also adds it to a dependence set and
529 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530 /// we will attempt to use additional run-time checks in order to get
531 /// the bounds of the pointer.
532 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533 MemAccessInfo Access,
534 const ValueToValueMap &Strides,
535 DenseMap<Value *, unsigned> &DepSetId,
536 Loop *TheLoop, unsigned &RunningDepId,
537 unsigned ASId, bool ShouldCheckStride,
538 bool Assume);
539
540 /// Check whether we can check the pointers at runtime for
541 /// non-intersection.
542 ///
543 /// Returns true if we need no check or if we do and we can generate them
544 /// (i.e. the pointers have computable bounds).
545 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546 Loop *TheLoop, const ValueToValueMap &Strides,
547 bool ShouldCheckWrap = false);
548
549 /// Goes over all memory accesses, checks whether a RT check is needed
550 /// and builds sets of dependent accesses.
551 void buildDependenceSets() {
552 processMemAccesses();
553 }
554
555 /// Initial processing of memory accesses determined that we need to
556 /// perform dependency checking.
557 ///
558 /// Note that this can later be cleared if we retry memcheck analysis without
559 /// dependency checking (i.e. FoundNonConstantDistanceDependence).
560 bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561
562 /// We decided that no dependence analysis would be used. Reset the state.
563 void resetDepChecks(MemoryDepChecker &DepChecker) {
564 CheckDeps.clear();
565 DepChecker.clearDependences();
566 }
567
568 MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569
570private:
571 typedef SetVector<MemAccessInfo> PtrAccessSet;
572
573 /// Go over all memory access and check whether runtime pointer checks
574 /// are needed and build sets of dependency check candidates.
575 void processMemAccesses();
576
577 /// Set of all accesses.
578 PtrAccessSet Accesses;
579
580 const DataLayout &DL;
581
582 /// The loop being checked.
583 const Loop *TheLoop;
584
585 /// List of accesses that need a further dependence check.
586 MemAccessInfoList CheckDeps;
587
588 /// Set of pointers that are read only.
589 SmallPtrSet<Value*, 16> ReadOnlyPtr;
590
591 /// An alias set tracker to partition the access set by underlying object and
592 //intrinsic property (such as TBAA metadata).
593 AliasSetTracker AST;
594
595 LoopInfo *LI;
596
597 /// Sets of potentially dependent accesses - members of one set share an
598 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
599 /// dependence check.
600 MemoryDepChecker::DepCandidates &DepCands;
601
602 /// Initial processing of memory accesses determined that we may need
603 /// to add memchecks. Perform the analysis to determine the necessary checks.
604 ///
605 /// Note that, this is different from isDependencyCheckNeeded. When we retry
606 /// memcheck analysis without dependency checking
607 /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
608 /// cleared while this remains set if we have potentially dependent accesses.
609 bool IsRTCheckAnalysisNeeded;
610
611 /// The SCEV predicate containing all the SCEV-related assumptions.
612 PredicatedScalarEvolution &PSE;
613};
614
615} // end anonymous namespace
616
617/// Check whether a pointer can participate in a runtime bounds check.
618/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
619/// by adding run-time checks (overflow checks) if necessary.
620static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
621 const ValueToValueMap &Strides, Value *Ptr,
622 Loop *L, bool Assume) {
623 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624
625 // The bounds for loop-invariant pointer is trivial.
626 if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627 return true;
628
629 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630
631 if (!AR && Assume)
632 AR = PSE.getAsAddRec(Ptr);
633
634 if (!AR)
635 return false;
636
637 return AR->isAffine();
638}
639
640/// Check whether a pointer address cannot wrap.
641static bool isNoWrap(PredicatedScalarEvolution &PSE,
642 const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
643 const SCEV *PtrScev = PSE.getSCEV(Ptr);
644 if (PSE.getSE()->isLoopInvariant(PtrScev, L))
645 return true;
646
647 int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
648 if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
649 return true;
650
651 return false;
652}
653
654bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
655 MemAccessInfo Access,
656 const ValueToValueMap &StridesMap,
657 DenseMap<Value *, unsigned> &DepSetId,
658 Loop *TheLoop, unsigned &RunningDepId,
659 unsigned ASId, bool ShouldCheckWrap,
660 bool Assume) {
661 Value *Ptr = Access.getPointer();
662
663 if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
664 return false;
665
666 // When we run after a failing dependency check we have to make sure
667 // we don't have wrapping pointers.
668 if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669 auto *Expr = PSE.getSCEV(Ptr);
670 if (!Assume || !isa<SCEVAddRecExpr>(Expr))
671 return false;
672 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
673 }
674
675 // The id of the dependence set.
676 unsigned DepId;
677
678 if (isDependencyCheckNeeded()) {
679 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
680 unsigned &LeaderId = DepSetId[Leader];
681 if (!LeaderId)
682 LeaderId = RunningDepId++;
683 DepId = LeaderId;
684 } else
685 // Each access has its own dependence set.
686 DepId = RunningDepId++;
687
688 bool IsWrite = Access.getInt();
689 RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a runtime check ptr:"
<< *Ptr << '\n'; } } while (false)
;
691
692 return true;
693 }
694
695bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
696 ScalarEvolution *SE, Loop *TheLoop,
697 const ValueToValueMap &StridesMap,
698 bool ShouldCheckWrap) {
699 // Find pointers with computable bounds. We are going to use this information
700 // to place a runtime bound check.
701 bool CanDoRT = true;
702
703 bool NeedRTCheck = false;
704 if (!IsRTCheckAnalysisNeeded) return true;
705
706 bool IsDepCheckNeeded = isDependencyCheckNeeded();
707
708 // We assign a consecutive id to access from different alias sets.
709 // Accesses between different groups doesn't need to be checked.
710 unsigned ASId = 1;
711 for (auto &AS : AST) {
712 int NumReadPtrChecks = 0;
713 int NumWritePtrChecks = 0;
714 bool CanDoAliasSetRT = true;
715
716 // We assign consecutive id to access from different dependence sets.
717 // Accesses within the same set don't need a runtime check.
718 unsigned RunningDepId = 1;
719 DenseMap<Value *, unsigned> DepSetId;
720
721 SmallVector<MemAccessInfo, 4> Retries;
722
723 for (auto A : AS) {
724 Value *Ptr = A.getValue();
725 bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
726 MemAccessInfo Access(Ptr, IsWrite);
727
728 if (IsWrite)
729 ++NumWritePtrChecks;
730 else
731 ++NumReadPtrChecks;
732
733 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734 RunningDepId, ASId, ShouldCheckWrap, false)) {
735 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Can't find bounds for ptr:"
<< *Ptr << '\n'; } } while (false)
;
736 Retries.push_back(Access);
737 CanDoAliasSetRT = false;
738 }
739 }
740
741 // If we have at least two writes or one write and a read then we need to
742 // check them. But there is no need to checks if there is only one
743 // dependence set for this alias set.
744 //
745 // Note that this function computes CanDoRT and NeedRTCheck independently.
746 // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
747 // for which we couldn't find the bounds but we don't actually need to emit
748 // any checks so it does not matter.
749 bool NeedsAliasSetRTCheck = false;
750 if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751 NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752 (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
753
754 // We need to perform run-time alias checks, but some pointers had bounds
755 // that couldn't be checked.
756 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
757 // Reset the CanDoSetRt flag and retry all accesses that have failed.
758 // We know that we need these checks, so we can now be more aggressive
759 // and add further checks if required (overflow checks).
760 CanDoAliasSetRT = true;
761 for (auto Access : Retries)
762 if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763 TheLoop, RunningDepId, ASId,
764 ShouldCheckWrap, /*Assume=*/true)) {
765 CanDoAliasSetRT = false;
766 break;
767 }
768 }
769
770 CanDoRT &= CanDoAliasSetRT;
771 NeedRTCheck |= NeedsAliasSetRTCheck;
772 ++ASId;
773 }
774
775 // If the pointers that we would use for the bounds comparison have different
776 // address spaces, assume the values aren't directly comparable, so we can't
777 // use them for the runtime check. We also have to assume they could
778 // overlap. In the future there should be metadata for whether address spaces
779 // are disjoint.
780 unsigned NumPointers = RtCheck.Pointers.size();
781 for (unsigned i = 0; i < NumPointers; ++i) {
782 for (unsigned j = i + 1; j < NumPointers; ++j) {
783 // Only need to check pointers between two different dependency sets.
784 if (RtCheck.Pointers[i].DependencySetId ==
785 RtCheck.Pointers[j].DependencySetId)
786 continue;
787 // Only need to check pointers in the same alias set.
788 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
789 continue;
790
791 Value *PtrI = RtCheck.Pointers[i].PointerValue;
792 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
793
794 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
795 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
796 if (ASi != ASj) {
797 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
798 dbgs() << "LAA: Runtime check would require comparison between"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
799 " different address spaces\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n"; } } while (false)
;
800 return false;
801 }
802 }
803 }
804
805 if (NeedRTCheck && CanDoRT)
806 RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
807
808 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We need to do " <<
RtCheck.getNumberOfChecks() << " pointer comparisons.\n"
; } } while (false)
809 << " pointer comparisons.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We need to do " <<
RtCheck.getNumberOfChecks() << " pointer comparisons.\n"
; } } while (false)
;
810
811 RtCheck.Need = NeedRTCheck;
812
813 bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814 if (!CanDoRTIfNeeded)
815 RtCheck.reset();
816 return CanDoRTIfNeeded;
817}
818
819void AccessAnalysis::processMemAccesses() {
820 // We process the set twice: first we process read-write pointers, last we
821 // process read-only pointers. This allows us to skip dependence tests for
822 // read-only pointers.
823
824 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Processing memory accesses...\n"
; } } while (false)
;
825 LLVM_DEBUG(dbgs() << " AST: "; AST.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " AST: "; AST.dump(); }
} while (false)
;
826 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Accesses(" <<
Accesses.size() << "):\n"; } } while (false)
;
827 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
828 for (auto A : Accesses)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
829 dbgs() << "\t" << *A.getPointer() << " (" <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
830 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
831 "read-only" : "read")) << ")\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
832 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { { for (auto A : Accesses) dbgs() <<
"\t" << *A.getPointer() << " (" << (A.getInt
() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
: "read")) << ")\n"; }; } } while (false)
;
833
834 // The AliasSetTracker has nicely partitioned our pointers by metadata
835 // compatibility and potential for underlying-object overlap. As a result, we
836 // only need to check for potential pointer dependencies within each alias
837 // set.
838 for (auto &AS : AST) {
839 // Note that both the alias-set tracker and the alias sets themselves used
840 // linked lists internally and so the iteration order here is deterministic
841 // (matching the original instruction order within each set).
842
843 bool SetHasWrite = false;
844
845 // Map of pointers to last access encountered.
846 typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
847 UnderlyingObjToAccessMap ObjToLastAccess;
848
849 // Set of access to check after all writes have been processed.
850 PtrAccessSet DeferredAccesses;
851
852 // Iterate over each alias set twice, once to process read/write pointers,
853 // and then to process read-only pointers.
854 for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855 bool UseDeferred = SetIteration > 0;
856 PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
857
858 for (auto AV : AS) {
859 Value *Ptr = AV.getValue();
860
861 // For a single memory access in AliasSetTracker, Accesses may contain
862 // both read and write, and they both need to be handled for CheckDeps.
863 for (auto AC : S) {
864 if (AC.getPointer() != Ptr)
865 continue;
866
867 bool IsWrite = AC.getInt();
868
869 // If we're using the deferred access set, then it contains only
870 // reads.
871 bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872 if (UseDeferred && !IsReadOnlyPtr)
873 continue;
874 // Otherwise, the pointer must be in the PtrAccessSet, either as a
875 // read or a write.
876 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 878, __PRETTY_FUNCTION__))
877 S.count(MemAccessInfo(Ptr, false))) &&((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 878, __PRETTY_FUNCTION__))
878 "Alias-set pointer not in the access set?")((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count
(MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?"
) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 878, __PRETTY_FUNCTION__))
;
879
880 MemAccessInfo Access(Ptr, IsWrite);
881 DepCands.insert(Access);
882
883 // Memorize read-only pointers for later processing and skip them in
884 // the first round (they need to be checked after we have seen all
885 // write pointers). Note: we also mark pointer that are not
886 // consecutive as "read-only" pointers (so that we check
887 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
888 if (!UseDeferred && IsReadOnlyPtr) {
889 DeferredAccesses.insert(Access);
890 continue;
891 }
892
893 // If this is a write - check other reads and writes for conflicts. If
894 // this is a read only check other writes for conflicts (but only if
895 // there is no other write to the ptr - this is an optimization to
896 // catch "a[i] = a[i] + " without having to do a dependence check).
897 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898 CheckDeps.push_back(Access);
899 IsRTCheckAnalysisNeeded = true;
900 }
901
902 if (IsWrite)
903 SetHasWrite = true;
904
905 // Create sets of pointers connected by a shared alias set and
906 // underlying object.
907 typedef SmallVector<const Value *, 16> ValueVector;
908 ValueVector TempObjects;
909
910 GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
911 LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Underlying objects for pointer "
<< *Ptr << "\n"; } } while (false)
912 << "Underlying objects for pointer " << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Underlying objects for pointer "
<< *Ptr << "\n"; } } while (false)
;
913 for (const Value *UnderlyingObj : TempObjects) {
914 // nullptr never alias, don't join sets for pointer that have "null"
915 // in their UnderlyingObjects list.
916 if (isa<ConstantPointerNull>(UnderlyingObj) &&
917 !NullPointerIsDefined(
918 TheLoop->getHeader()->getParent(),
919 UnderlyingObj->getType()->getPointerAddressSpace()))
920 continue;
921
922 UnderlyingObjToAccessMap::iterator Prev =
923 ObjToLastAccess.find(UnderlyingObj);
924 if (Prev != ObjToLastAccess.end())
925 DepCands.unionSets(Access, Prev->second);
926
927 ObjToLastAccess[UnderlyingObj] = Access;
928 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " " << *UnderlyingObj
<< "\n"; } } while (false)
;
929 }
930 }
931 }
932 }
933 }
934}
935
936static bool isInBoundsGep(Value *Ptr) {
937 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
938 return GEP->isInBounds();
939 return false;
940}
941
942/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
943/// i.e. monotonically increasing/decreasing.
944static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
945 PredicatedScalarEvolution &PSE, const Loop *L) {
946 // FIXME: This should probably only return true for NUW.
947 if (AR->getNoWrapFlags(SCEV::NoWrapMask))
948 return true;
949
950 // Scalar evolution does not propagate the non-wrapping flags to values that
951 // are derived from a non-wrapping induction variable because non-wrapping
952 // could be flow-sensitive.
953 //
954 // Look through the potentially overflowing instruction to try to prove
955 // non-wrapping for the *specific* value of Ptr.
956
957 // The arithmetic implied by an inbounds GEP can't overflow.
958 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
959 if (!GEP || !GEP->isInBounds())
960 return false;
961
962 // Make sure there is only one non-const index and analyze that.
963 Value *NonConstIndex = nullptr;
964 for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
965 if (!isa<ConstantInt>(Index)) {
966 if (NonConstIndex)
967 return false;
968 NonConstIndex = Index;
969 }
970 if (!NonConstIndex)
971 // The recurrence is on the pointer, ignore for now.
972 return false;
973
974 // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
975 // AddRec using a NSW operation.
976 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977 if (OBO->hasNoSignedWrap() &&
978 // Assume constant for other the operand so that the AddRec can be
979 // easily found.
980 isa<ConstantInt>(OBO->getOperand(1))) {
981 auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
982
983 if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
985 }
986
987 return false;
988}
989
990/// Check whether the access through \p Ptr has a constant stride.
991int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
992 const Loop *Lp, const ValueToValueMap &StridesMap,
993 bool Assume, bool ShouldCheckWrap) {
994 Type *Ty = Ptr->getType();
995 assert(Ty->isPointerTy() && "Unexpected non-ptr")((Ty->isPointerTy() && "Unexpected non-ptr") ? static_cast
<void> (0) : __assert_fail ("Ty->isPointerTy() && \"Unexpected non-ptr\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 995, __PRETTY_FUNCTION__))
;
996
997 // Make sure that the pointer does not point to aggregate types.
998 auto *PtrTy = cast<PointerType>(Ty);
999 if (PtrTy->getElementType()->isAggregateType()) {
1000 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n"; } } while (false)
1001 << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n"; } } while (false)
;
1002 return 0;
1003 }
1004
1005 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1006
1007 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1008 if (Assume && !AR)
1009 AR = PSE.getAsAddRec(Ptr);
1010
1011 if (!AR) {
1012 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n"
; } } while (false)
1013 << " SCEV: " << *PtrScev << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n"
; } } while (false)
;
1014 return 0;
1015 }
1016
1017 // The access function must stride over the innermost loop.
1018 if (Lp != AR->getLoop()) {
1019 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1020 << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1021 return 0;
1022 }
1023
1024 // The address calculation must not wrap. Otherwise, a dependence could be
1025 // inverted.
1026 // An inbounds getelementptr that is a AddRec with a unit stride
1027 // cannot wrap per definition. The unit stride requirement is checked later.
1028 // An getelementptr without an inbounds attribute and unit stride would have
1029 // to access the pointer value "0" which is undefined behavior in address
1030 // space 0, therefore we can also vectorize this case.
1031 bool IsInBoundsGEP = isInBoundsGep(Ptr);
1032 bool IsNoWrapAddRec = !ShouldCheckWrap ||
1033 PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
1034 isNoWrapAddRec(Ptr, AR, PSE, Lp);
1035 if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1036 NullPointerIsDefined(Lp->getHeader()->getParent(),
1037 PtrTy->getAddressSpace())) {
1038 if (Assume) {
1039 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1040 IsNoWrapAddRec = true;
1041 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1042 << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1043 << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1044 << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n" <<
"LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
;
1045 } else {
1046 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1047 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1048 << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1049 return 0;
1050 }
1051 }
1052
1053 // Check the step is constant.
1054 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1055
1056 // Calculate the pointer stride and check if it is constant.
1057 const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1058 if (!C) {
1059 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
1060 << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided "
<< *Ptr << " SCEV: " << *AR << "\n";
} } while (false)
;
1061 return 0;
1062 }
1063
1064 auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1065 int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1066 const APInt &APStepVal = C->getAPInt();
1067
1068 // Huge step value - give up.
1069 if (APStepVal.getBitWidth() > 64)
1070 return 0;
1071
1072 int64_t StepVal = APStepVal.getSExtValue();
1073
1074 // Strided access.
1075 int64_t Stride = StepVal / Size;
1076 int64_t Rem = StepVal % Size;
1077 if (Rem)
1078 return 0;
1079
1080 // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1081 // know we can't "wrap around the address space". In case of address space
1082 // zero we know that this won't happen without triggering undefined behavior.
1083 if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1084 (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1085 PtrTy->getAddressSpace()))) {
1086 if (Assume) {
1087 // We can avoid this case by adding a run-time check.
1088 LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1089 << "inbounds or in address space 0 may wrap:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1090 << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1091 << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
1092 << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n" <<
"LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: "
<< *AR << "\n" << "LAA: Added an overflow assumption\n"
; } } while (false)
;
1093 PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1094 } else
1095 return 0;
1096 }
1097
1098 return Stride;
1099}
1100
1101bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
1102 ScalarEvolution &SE,
1103 SmallVectorImpl<unsigned> &SortedIndices) {
1104 assert(llvm::all_of(((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1106, __PRETTY_FUNCTION__))
1105 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1106, __PRETTY_FUNCTION__))
1106 "Expected list of pointer operands.")((llvm::all_of( VL, [](const Value *V) { return V->getType
()->isPointerTy(); }) && "Expected list of pointer operands."
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1106, __PRETTY_FUNCTION__))
;
1107 SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
1108 OffValPairs.reserve(VL.size());
1109
1110 // Walk over the pointers, and map each of them to an offset relative to
1111 // first pointer in the array.
1112 Value *Ptr0 = VL[0];
1113 const SCEV *Scev0 = SE.getSCEV(Ptr0);
1114 Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1115
1116 llvm::SmallSet<int64_t, 4> Offsets;
1117 for (auto *Ptr : VL) {
1118 // TODO: Outline this code as a special, more time consuming, version of
1119 // computeConstantDifference() function.
1120 if (Ptr->getType()->getPointerAddressSpace() !=
1121 Ptr0->getType()->getPointerAddressSpace())
1122 return false;
1123 // If a pointer refers to a different underlying object, bail - the
1124 // pointers are by definition incomparable.
1125 Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1126 if (CurrObj != Obj0)
1127 return false;
1128
1129 const SCEV *Scev = SE.getSCEV(Ptr);
1130 const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1131 // The pointers may not have a constant offset from each other, or SCEV
1132 // may just not be smart enough to figure out they do. Regardless,
1133 // there's nothing we can do.
1134 if (!Diff)
1135 return false;
1136
1137 // Check if the pointer with the same offset is found.
1138 int64_t Offset = Diff->getAPInt().getSExtValue();
1139 if (!Offsets.insert(Offset).second)
1140 return false;
1141 OffValPairs.emplace_back(Offset, Ptr);
1142 }
1143 SortedIndices.clear();
1144 SortedIndices.resize(VL.size());
1145 std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1146
1147 // Sort the memory accesses and keep the order of their uses in UseOrder.
1148 llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
1149 return OffValPairs[Left].first < OffValPairs[Right].first;
1150 });
1151
1152 // Check if the order is consecutive already.
1153 if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1154 return I == SortedIndices[I];
1155 }))
1156 SortedIndices.clear();
1157
1158 return true;
1159}
1160
1161/// Take the address space operand from the Load/Store instruction.
1162/// Returns -1 if this is not a valid Load/Store instruction.
1163static unsigned getAddressSpaceOperand(Value *I) {
1164 if (LoadInst *L = dyn_cast<LoadInst>(I))
1165 return L->getPointerAddressSpace();
1166 if (StoreInst *S = dyn_cast<StoreInst>(I))
1167 return S->getPointerAddressSpace();
1168 return -1;
1169}
1170
1171/// Returns true if the memory operations \p A and \p B are consecutive.
1172bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1173 ScalarEvolution &SE, bool CheckType) {
1174 Value *PtrA = getLoadStorePointerOperand(A);
1175 Value *PtrB = getLoadStorePointerOperand(B);
1176 unsigned ASA = getAddressSpaceOperand(A);
1177 unsigned ASB = getAddressSpaceOperand(B);
1178
1179 // Check that the address spaces match and that the pointers are valid.
1180 if (!PtrA || !PtrB || (ASA != ASB))
1181 return false;
1182
1183 // Make sure that A and B are different pointers.
1184 if (PtrA == PtrB)
1185 return false;
1186
1187 // Make sure that A and B have the same type if required.
1188 if (CheckType && PtrA->getType() != PtrB->getType())
1189 return false;
1190
1191 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1192 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1193
1194 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1195 PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1196 PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1197
1198 // Retrieve the address space again as pointer stripping now tracks through
1199 // `addrspacecast`.
1200 ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
1201 ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
1202 // Check that the address spaces match and that the pointers are valid.
1203 if (ASA != ASB)
1204 return false;
1205
1206 IdxWidth = DL.getIndexSizeInBits(ASA);
1207 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1208 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1209
1210 APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1211
1212 // OffsetDelta = OffsetB - OffsetA;
1213 const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1214 const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1215 const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1216 const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
1217
1218 // Check if they are based on the same pointer. That makes the offsets
1219 // sufficient.
1220 if (PtrA == PtrB)
1221 return OffsetDelta == Size;
1222
1223 // Compute the necessary base pointer delta to have the necessary final delta
1224 // equal to the size.
1225 // BaseDelta = Size - OffsetDelta;
1226 const SCEV *SizeSCEV = SE.getConstant(Size);
1227 const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1228
1229 // Otherwise compute the distance with SCEV between the base pointers.
1230 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1231 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1232 const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1233 return X == PtrSCEVB;
1234}
1235
1236MemoryDepChecker::VectorizationSafetyStatus
1237MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1238 switch (Type) {
1239 case NoDep:
1240 case Forward:
1241 case BackwardVectorizable:
1242 return VectorizationSafetyStatus::Safe;
1243
1244 case Unknown:
1245 return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1246 case ForwardButPreventsForwarding:
1247 case Backward:
1248 case BackwardVectorizableButPreventsForwarding:
1249 return VectorizationSafetyStatus::Unsafe;
1250 }
1251 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1251)
;
1252}
1253
1254bool MemoryDepChecker::Dependence::isBackward() const {
1255 switch (Type) {
1256 case NoDep:
1257 case Forward:
1258 case ForwardButPreventsForwarding:
1259 case Unknown:
1260 return false;
1261
1262 case BackwardVectorizable:
1263 case Backward:
1264 case BackwardVectorizableButPreventsForwarding:
1265 return true;
1266 }
1267 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1267)
;
1268}
1269
1270bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1271 return isBackward() || Type == Unknown;
1272}
1273
1274bool MemoryDepChecker::Dependence::isForward() const {
1275 switch (Type) {
1276 case Forward:
1277 case ForwardButPreventsForwarding:
1278 return true;
1279
1280 case NoDep:
1281 case Unknown:
1282 case BackwardVectorizable:
1283 case Backward:
1284 case BackwardVectorizableButPreventsForwarding:
1285 return false;
1286 }
1287 llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1287)
;
1288}
1289
1290bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1291 uint64_t TypeByteSize) {
1292 // If loads occur at a distance that is not a multiple of a feasible vector
1293 // factor store-load forwarding does not take place.
1294 // Positive dependences might cause troubles because vectorizing them might
1295 // prevent store-load forwarding making vectorized code run a lot slower.
1296 // a[i] = a[i-3] ^ a[i-8];
1297 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1298 // hence on your typical architecture store-load forwarding does not take
1299 // place. Vectorizing in such cases does not make sense.
1300 // Store-load forwarding distance.
1301
1302 // After this many iterations store-to-load forwarding conflicts should not
1303 // cause any slowdowns.
1304 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1305 // Maximum vector factor.
1306 uint64_t MaxVFWithoutSLForwardIssues = std::min(
1307 VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1308
1309 // Compute the smallest VF at which the store and load would be misaligned.
1310 for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1311 VF *= 2) {
1312 // If the number of vector iteration between the store and the load are
1313 // small we could incur conflicts.
1314 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1315 MaxVFWithoutSLForwardIssues = (VF >>= 1);
Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF'
1316 break;
1317 }
1318 }
1319
1320 if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1321 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
1322 dbgs() << "LAA: Distance " << Distancedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
1323 << " that could cause a store-load forwarding conflict\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance " <<
Distance << " that could cause a store-load forwarding conflict\n"
; } } while (false)
;
1324 return true;
1325 }
1326
1327 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1328 MaxVFWithoutSLForwardIssues !=
1329 VectorizerParams::MaxVectorWidth * TypeByteSize)
1330 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1331 return false;
1332}
1333
1334void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1335 if (Status < S)
1336 Status = S;
1337}
1338
1339/// Given a non-constant (unknown) dependence-distance \p Dist between two
1340/// memory accesses, that have the same stride whose absolute value is given
1341/// in \p Stride, and that have the same type size \p TypeByteSize,
1342/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1343/// possible to prove statically that the dependence distance is larger
1344/// than the range that the accesses will travel through the execution of
1345/// the loop. If so, return true; false otherwise. This is useful for
1346/// example in loops such as the following (PR31098):
1347/// for (i = 0; i < D; ++i) {
1348/// = out[i];
1349/// out[i+D] =
1350/// }
1351static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1352 const SCEV &BackedgeTakenCount,
1353 const SCEV &Dist, uint64_t Stride,
1354 uint64_t TypeByteSize) {
1355
1356 // If we can prove that
1357 // (**) |Dist| > BackedgeTakenCount * Step
1358 // where Step is the absolute stride of the memory accesses in bytes,
1359 // then there is no dependence.
1360 //
1361 // Rationale:
1362 // We basically want to check if the absolute distance (|Dist/Step|)
1363 // is >= the loop iteration count (or > BackedgeTakenCount).
1364 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1365 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1366 // that the dependence distance is >= VF; This is checked elsewhere.
1367 // But in some cases we can prune unknown dependence distances early, and
1368 // even before selecting the VF, and without a runtime test, by comparing
1369 // the distance against the loop iteration count. Since the vectorized code
1370 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1371 // also guarantees that distance >= VF.
1372 //
1373 const uint64_t ByteStride = Stride * TypeByteSize;
1374 const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1375 const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1376
1377 const SCEV *CastedDist = &Dist;
1378 const SCEV *CastedProduct = Product;
1379 uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1380 uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1381
1382 // The dependence distance can be positive/negative, so we sign extend Dist;
1383 // The multiplication of the absolute stride in bytes and the
1384 // backedgeTakenCount is non-negative, so we zero extend Product.
1385 if (DistTypeSize > ProductTypeSize)
1386 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1387 else
1388 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1389
1390 // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1391 // (If so, then we have proven (**) because |Dist| >= Dist)
1392 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1393 if (SE.isKnownPositive(Minus))
1394 return true;
1395
1396 // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1397 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1398 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1399 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1400 if (SE.isKnownPositive(Minus))
1401 return true;
1402
1403 return false;
1404}
1405
1406/// Check the dependence for two accesses with the same stride \p Stride.
1407/// \p Distance is the positive distance and \p TypeByteSize is type size in
1408/// bytes.
1409///
1410/// \returns true if they are independent.
1411static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1412 uint64_t TypeByteSize) {
1413 assert(Stride > 1 && "The stride must be greater than 1")((Stride > 1 && "The stride must be greater than 1"
) ? static_cast<void> (0) : __assert_fail ("Stride > 1 && \"The stride must be greater than 1\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1413, __PRETTY_FUNCTION__))
;
1414 assert(TypeByteSize > 0 && "The type size in byte must be non-zero")((TypeByteSize > 0 && "The type size in byte must be non-zero"
) ? static_cast<void> (0) : __assert_fail ("TypeByteSize > 0 && \"The type size in byte must be non-zero\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1414, __PRETTY_FUNCTION__))
;
1415 assert(Distance > 0 && "The distance must be non-zero")((Distance > 0 && "The distance must be non-zero")
? static_cast<void> (0) : __assert_fail ("Distance > 0 && \"The distance must be non-zero\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1415, __PRETTY_FUNCTION__))
;
1416
1417 // Skip if the distance is not multiple of type byte size.
1418 if (Distance % TypeByteSize)
1419 return false;
1420
1421 uint64_t ScaledDist = Distance / TypeByteSize;
1422
1423 // No dependence if the scaled distance is not multiple of the stride.
1424 // E.g.
1425 // for (i = 0; i < 1024 ; i += 4)
1426 // A[i+2] = A[i] + 1;
1427 //
1428 // Two accesses in memory (scaled distance is 2, stride is 4):
1429 // | A[0] | | | | A[4] | | | |
1430 // | | | A[2] | | | | A[6] | |
1431 //
1432 // E.g.
1433 // for (i = 0; i < 1024 ; i += 3)
1434 // A[i+4] = A[i] + 1;
1435 //
1436 // Two accesses in memory (scaled distance is 4, stride is 3):
1437 // | A[0] | | | A[3] | | | A[6] | | |
1438 // | | | | | A[4] | | | A[7] | |
1439 return ScaledDist % Stride;
1440}
1441
1442MemoryDepChecker::Dependence::DepType
1443MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1444 const MemAccessInfo &B, unsigned BIdx,
1445 const ValueToValueMap &Strides) {
1446 assert (AIdx < BIdx && "Must pass arguments in program order")((AIdx < BIdx && "Must pass arguments in program order"
) ? static_cast<void> (0) : __assert_fail ("AIdx < BIdx && \"Must pass arguments in program order\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1446, __PRETTY_FUNCTION__))
;
1447
1448 Value *APtr = A.getPointer();
1449 Value *BPtr = B.getPointer();
1450 bool AIsWrite = A.getInt();
1451 bool BIsWrite = B.getInt();
1452
1453 // Two reads are independent.
1454 if (!AIsWrite && !BIsWrite)
1455 return Dependence::NoDep;
1456
1457 // We cannot check pointers in different address spaces.
1458 if (APtr->getType()->getPointerAddressSpace() !=
1459 BPtr->getType()->getPointerAddressSpace())
1460 return Dependence::Unknown;
1461
1462 int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1463 int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1464
1465 const SCEV *Src = PSE.getSCEV(APtr);
1466 const SCEV *Sink = PSE.getSCEV(BPtr);
1467
1468 // If the induction step is negative we have to invert source and sink of the
1469 // dependence.
1470 if (StrideAPtr < 0) {
1471 std::swap(APtr, BPtr);
1472 std::swap(Src, Sink);
1473 std::swap(AIsWrite, BIsWrite);
1474 std::swap(AIdx, BIdx);
1475 std::swap(StrideAPtr, StrideBPtr);
1476 }
1477
1478 const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1479
1480 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sinkdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Src Scev: " <<
*Src << "Sink Scev: " << *Sink << "(Induction step: "
<< StrideAPtr << ")\n"; } } while (false)
1481 << "(Induction step: " << StrideAPtr << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Src Scev: " <<
*Src << "Sink Scev: " << *Sink << "(Induction step: "
<< StrideAPtr << ")\n"; } } while (false)
;
1482 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance for " <<
*InstMap[AIdx] << " to " << *InstMap[BIdx] <<
": " << *Dist << "\n"; } } while (false)
1483 << *InstMap[BIdx] << ": " << *Dist << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Distance for " <<
*InstMap[AIdx] << " to " << *InstMap[BIdx] <<
": " << *Dist << "\n"; } } while (false)
;
1484
1485 // Need accesses with constant stride. We don't want to vectorize
1486 // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1487 // the address space.
1488 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1489 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Pointer access with non-constant stride\n"
; } } while (false)
;
1490 return Dependence::Unknown;
1491 }
1492
1493 Type *ATy = APtr->getType()->getPointerElementType();
1494 Type *BTy = BPtr->getType()->getPointerElementType();
1495 auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1496 uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1497 uint64_t Stride = std::abs(StrideAPtr);
1498 const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1499 if (!C) {
1500 if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1501 isSafeDependenceDistance(DL, *(PSE.getSE()),
1502 *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1503 TypeByteSize))
1504 return Dependence::NoDep;
1505
1506 LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Dependence because of non-constant distance\n"
; } } while (false)
;
1507 FoundNonConstantDistanceDependence = true;
1508 return Dependence::Unknown;
1509 }
1510
1511 const APInt &Val = C->getAPInt();
1512 int64_t Distance = Val.getSExtValue();
1513
1514 // Attempt to prove strided accesses independent.
1515 if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1516 areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1517 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Strided accesses are independent\n"
; } } while (false)
;
1518 return Dependence::NoDep;
1519 }
1520
1521 // Negative distances are not plausible dependencies.
1522 if (Val.isNegative()) {
1523 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1524 if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1525 (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1526 ATy != BTy)) {
1527 LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"
; } } while (false)
;
1528 return Dependence::ForwardButPreventsForwarding;
1529 }
1530
1531 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Dependence is negative\n"
; } } while (false)
;
1532 return Dependence::Forward;
1533 }
1534
1535 // Write to the same location with the same size.
1536 // Could be improved to assert type sizes are the same (i32 == float, etc).
1537 if (Val == 0) {
1538 if (ATy == BTy)
1539 return Dependence::Forward;
1540 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n"
; } } while (false)
1541 dbgs() << "LAA: Zero dependence difference but different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n"
; } } while (false)
;
1542 return Dependence::Unknown;
1543 }
1544
1545 assert(Val.isStrictlyPositive() && "Expect a positive value")((Val.isStrictlyPositive() && "Expect a positive value"
) ? static_cast<void> (0) : __assert_fail ("Val.isStrictlyPositive() && \"Expect a positive value\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1545, __PRETTY_FUNCTION__))
;
1546
1547 if (ATy != BTy) {
1548 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
1549 dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
1550 << "LAA: ReadWrite-Write positive dependency with different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"
; } } while (false)
;
1551 return Dependence::Unknown;
1552 }
1553
1554 // Bail out early if passed-in parameters make vectorization not feasible.
1555 unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1556 VectorizerParams::VectorizationFactor : 1);
1557 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1558 VectorizerParams::VectorizationInterleave : 1);
1559 // The minimum number of iterations for a vectorized/unrolled version.
1560 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1561
1562 // It's not vectorizable if the distance is smaller than the minimum distance
1563 // needed for a vectroized/unrolled version. Vectorizing one iteration in
1564 // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1565 // TypeByteSize (No need to plus the last gap distance).
1566 //
1567 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1568 // foo(int *A) {
1569 // int *B = (int *)((char *)A + 14);
1570 // for (i = 0 ; i < 1024 ; i += 2)
1571 // B[i] = A[i] + 1;
1572 // }
1573 //
1574 // Two accesses in memory (stride is 2):
1575 // | A[0] | | A[2] | | A[4] | | A[6] | |
1576 // | B[0] | | B[2] | | B[4] |
1577 //
1578 // Distance needs for vectorizing iterations except the last iteration:
1579 // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1580 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1581 //
1582 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1583 // 12, which is less than distance.
1584 //
1585 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1586 // the minimum distance needed is 28, which is greater than distance. It is
1587 // not safe to do vectorization.
1588 uint64_t MinDistanceNeeded =
1589 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1590 if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1591 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n'; } } while (false)
1592 << Distance << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n'; } } while (false)
;
1593 return Dependence::Backward;
1594 }
1595
1596 // Unsafe if the minimum distance needed is greater than max safe distance.
1597 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1598 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes"; } } while
(false)
1599 << MinDistanceNeeded << " size in bytes")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes"; } } while
(false)
;
1600 return Dependence::Backward;
1601 }
1602
1603 // Positive distance bigger than max vectorization factor.
1604 // FIXME: Should use max factor instead of max distance in bytes, which could
1605 // not handle different types.
1606 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1607 // void foo (int *A, char *B) {
1608 // for (unsigned i = 0; i < 1024; i++) {
1609 // A[i+2] = A[i] + 1;
1610 // B[i+2] = B[i] + 1;
1611 // }
1612 // }
1613 //
1614 // This case is currently unsafe according to the max safe distance. If we
1615 // analyze the two accesses on array B, the max safe dependence distance
1616 // is 2. Then we analyze the accesses on array A, the minimum distance needed
1617 // is 8, which is less than 2 and forbidden vectorization, But actually
1618 // both A and B could be vectorized by 2 iterations.
1619 MaxSafeDepDistBytes =
1620 std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1621
1622 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1623 if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1624 couldPreventStoreLoadForward(Distance, TypeByteSize))
1625 return Dependence::BackwardVectorizableButPreventsForwarding;
1626
1627 uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1628 LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Positive distance "
<< Val.getSExtValue() << " with max VF = " <<
MaxVF << '\n'; } } while (false)
1629 << " with max VF = " << MaxVF << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Positive distance "
<< Val.getSExtValue() << " with max VF = " <<
MaxVF << '\n'; } } while (false)
;
1630 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1631 MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1632 return Dependence::BackwardVectorizable;
1633}
1634
1635bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
1636 MemAccessInfoList &CheckDeps,
1637 const ValueToValueMap &Strides) {
1638
1639 MaxSafeDepDistBytes = -1;
1640 SmallPtrSet<MemAccessInfo, 8> Visited;
1641 for (MemAccessInfo CurAccess : CheckDeps) {
1642 if (Visited.count(CurAccess))
1643 continue;
1644
1645 // Get the relevant memory access set.
1646 EquivalenceClasses<MemAccessInfo>::iterator I =
1647 AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1648
1649 // Check accesses within this set.
1650 EquivalenceClasses<MemAccessInfo>::member_iterator AI =
1651 AccessSets.member_begin(I);
1652 EquivalenceClasses<MemAccessInfo>::member_iterator AE =
1653 AccessSets.member_end();
1654
1655 // Check every access pair.
1656 while (AI != AE) {
1657 Visited.insert(*AI);
1658 bool AIIsWrite = AI->getInt();
1659 // Check loads only against next equivalent class, but stores also against
1660 // other stores in the same equivalence class - to the same address.
1661 EquivalenceClasses<MemAccessInfo>::member_iterator OI =
1662 (AIIsWrite ? AI : std::next(AI));
1663 while (OI != AE) {
1664 // Check every accessing instruction pair in program order.
1665 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1666 I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1667 // Scan all accesses of another equivalence class, but only the next
1668 // accesses of the same equivalent class.
1669 for (std::vector<unsigned>::iterator
1670 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1671 I2E = (OI == AI ? I1E : Accesses[*OI].end());
1672 I2 != I2E; ++I2) {
1673 auto A = std::make_pair(&*AI, *I1);
1674 auto B = std::make_pair(&*OI, *I2);
1675
1676 assert(*I1 != *I2)((*I1 != *I2) ? static_cast<void> (0) : __assert_fail (
"*I1 != *I2", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 1676, __PRETTY_FUNCTION__))
;
1677 if (*I1 > *I2)
1678 std::swap(A, B);
1679
1680 Dependence::DepType Type =
1681 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1682 mergeInStatus(Dependence::isSafeForVectorization(Type));
1683
1684 // Gather dependences unless we accumulated MaxDependences
1685 // dependences. In that case return as soon as we find the first
1686 // unsafe dependence. This puts a limit on this quadratic
1687 // algorithm.
1688 if (RecordDependences) {
1689 if (Type != Dependence::NoDep)
1690 Dependences.push_back(Dependence(A.second, B.second, Type));
1691
1692 if (Dependences.size() >= MaxDependences) {
1693 RecordDependences = false;
1694 Dependences.clear();
1695 LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n"
; } } while (false)
1696 << "Too many dependences, stopped recording\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n"
; } } while (false)
;
1697 }
1698 }
1699 if (!RecordDependences && !isSafeForVectorization())
1700 return false;
1701 }
1702 ++OI;
1703 }
1704 AI++;
1705 }
1706 }
1707
1708 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Total Dependences: " <<
Dependences.size() << "\n"; } } while (false)
;
1709 return isSafeForVectorization();
1710}
1711
1712SmallVector<Instruction *, 4>
1713MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
1714 MemAccessInfo Access(Ptr, isWrite);
1715 auto &IndexVector = Accesses.find(Access)->second;
1716
1717 SmallVector<Instruction *, 4> Insts;
1718 transform(IndexVector,
1719 std::back_inserter(Insts),
1720 [&](unsigned Idx) { return this->InstMap[Idx]; });
1721 return Insts;
1722}
1723
1724const char *MemoryDepChecker::Dependence::DepName[] = {
1725 "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1726 "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1727
1728void MemoryDepChecker::Dependence::print(
1729 raw_ostream &OS, unsigned Depth,
1730 const SmallVectorImpl<Instruction *> &Instrs) const {
1731 OS.indent(Depth) << DepName[Type] << ":\n";
1732 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1733 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1734}
1735
1736bool LoopAccessInfo::canAnalyzeLoop() {
1737 // We need to have a loop header.
1738 LLVM_DEBUG(dbgs() << "LAA: Found a loop in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
1739 << TheLoop->getHeader()->getParent()->getName() << ": "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
1740 << TheLoop->getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a loop in " <<
TheLoop->getHeader()->getParent()->getName() <<
": " << TheLoop->getHeader()->getName() <<
'\n'; } } while (false)
;
1741
1742 // We can only analyze innermost loops.
1743 if (!TheLoop->empty()) {
1744 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop is not the innermost loop\n"
; } } while (false)
;
1745 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1746 return false;
1747 }
1748
1749 // We must have a single backedge.
1750 if (TheLoop->getNumBackEdges() != 1) {
1751 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1752 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1753 recordAnalysis("CFGNotUnderstood")
1754 << "loop control flow is not understood by analyzer";
1755 return false;
1756 }
1757
1758 // We must have a single exiting block.
1759 if (!TheLoop->getExitingBlock()) {
1760 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1761 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1762 recordAnalysis("CFGNotUnderstood")
1763 << "loop control flow is not understood by analyzer";
1764 return false;
1765 }
1766
1767 // We only handle bottom-tested loops, i.e. loop in which the condition is
1768 // checked at the end of each iteration. With that we can assume that all
1769 // instructions in the loop are executed the same number of times.
1770 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1771 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
1772 dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n"
; } } while (false)
;
1773 recordAnalysis("CFGNotUnderstood")
1774 << "loop control flow is not understood by analyzer";
1775 return false;
1776 }
1777
1778 // ScalarEvolution needs to be able to find the exit count.
1779 const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1780 if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1781 recordAnalysis("CantComputeNumberOfIterations")
1782 << "could not determine number of loop iterations";
1783 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: SCEV could not compute the loop exit count.\n"
; } } while (false)
;
1784 return false;
1785 }
1786
1787 return true;
1788}
1789
1790void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1791 const TargetLibraryInfo *TLI,
1792 DominatorTree *DT) {
1793 typedef SmallPtrSet<Value*, 16> ValueSet;
1794
1795 // Holds the Load and Store instructions.
1796 SmallVector<LoadInst *, 16> Loads;
1797 SmallVector<StoreInst *, 16> Stores;
1798
1799 // Holds all the different accesses in the loop.
1800 unsigned NumReads = 0;
1801 unsigned NumReadWrites = 0;
1802
1803 bool HasComplexMemInst = false;
1804
1805 // A runtime check is only legal to insert if there are no convergent calls.
1806 HasConvergentOp = false;
1807
1808 PtrRtChecking->Pointers.clear();
1809 PtrRtChecking->Need = false;
1810
1811 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1812
1813 // For each block.
1814 for (BasicBlock *BB : TheLoop->blocks()) {
1815 // Scan the BB and collect legal loads and stores. Also detect any
1816 // convergent instructions.
1817 for (Instruction &I : *BB) {
1818 if (auto *Call = dyn_cast<CallBase>(&I)) {
1819 if (Call->isConvergent())
1820 HasConvergentOp = true;
1821 }
1822
1823 // With both a non-vectorizable memory instruction and a convergent
1824 // operation, found in this loop, no reason to continue the search.
1825 if (HasComplexMemInst && HasConvergentOp) {
1826 CanVecMem = false;
1827 return;
1828 }
1829
1830 // Avoid hitting recordAnalysis multiple times.
1831 if (HasComplexMemInst)
1832 continue;
1833
1834 // If this is a load, save it. If this instruction can read from memory
1835 // but is not a load, then we quit. Notice that we don't handle function
1836 // calls that read or write.
1837 if (I.mayReadFromMemory()) {
1838 // Many math library functions read the rounding mode. We will only
1839 // vectorize a loop if it contains known function calls that don't set
1840 // the flag. Therefore, it is safe to ignore this read from memory.
1841 auto *Call = dyn_cast<CallInst>(&I);
1842 if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1843 continue;
1844
1845 // If the function has an explicit vectorized counterpart, we can safely
1846 // assume that it can be vectorized.
1847 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1848 !VFDatabase::getMappings(*Call).empty())
1849 continue;
1850
1851 auto *Ld = dyn_cast<LoadInst>(&I);
1852 if (!Ld) {
1853 recordAnalysis("CantVectorizeInstruction", Ld)
1854 << "instruction cannot be vectorized";
1855 HasComplexMemInst = true;
1856 continue;
1857 }
1858 if (!Ld->isSimple() && !IsAnnotatedParallel) {
1859 recordAnalysis("NonSimpleLoad", Ld)
1860 << "read with atomic ordering or volatile read";
1861 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a non-simple load.\n"
; } } while (false)
;
1862 HasComplexMemInst = true;
1863 continue;
1864 }
1865 NumLoads++;
1866 Loads.push_back(Ld);
1867 DepChecker->addAccess(Ld);
1868 if (EnableMemAccessVersioning)
1869 collectStridedAccess(Ld);
1870 continue;
1871 }
1872
1873 // Save 'store' instructions. Abort if other instructions write to memory.
1874 if (I.mayWriteToMemory()) {
1875 auto *St = dyn_cast<StoreInst>(&I);
1876 if (!St) {
1877 recordAnalysis("CantVectorizeInstruction", St)
1878 << "instruction cannot be vectorized";
1879 HasComplexMemInst = true;
1880 continue;
1881 }
1882 if (!St->isSimple() && !IsAnnotatedParallel) {
1883 recordAnalysis("NonSimpleStore", St)
1884 << "write with atomic ordering or volatile write";
1885 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a non-simple store.\n"
; } } while (false)
;
1886 HasComplexMemInst = true;
1887 continue;
1888 }
1889 NumStores++;
1890 Stores.push_back(St);
1891 DepChecker->addAccess(St);
1892 if (EnableMemAccessVersioning)
1893 collectStridedAccess(St);
1894 }
1895 } // Next instr.
1896 } // Next block.
1897
1898 if (HasComplexMemInst) {
1899 CanVecMem = false;
1900 return;
1901 }
1902
1903 // Now we have two lists that hold the loads and the stores.
1904 // Next, we find the pointers that they use.
1905
1906 // Check if we see any stores. If there are no stores, then we don't
1907 // care if the pointers are *restrict*.
1908 if (!Stores.size()) {
1909 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a read-only loop!\n"
; } } while (false)
;
1910 CanVecMem = true;
1911 return;
1912 }
1913
1914 MemoryDepChecker::DepCandidates DependentAccesses;
1915 AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1916 TheLoop, AA, LI, DependentAccesses, *PSE);
1917
1918 // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1919 // multiple times on the same object. If the ptr is accessed twice, once
1920 // for read and once for write, it will only appear once (on the write
1921 // list). This is okay, since we are going to check for conflicts between
1922 // writes and between reads and writes, but not between reads and reads.
1923 ValueSet Seen;
1924
1925 // Record uniform store addresses to identify if we have multiple stores
1926 // to the same address.
1927 ValueSet UniformStores;
1928
1929 for (StoreInst *ST : Stores) {
1930 Value *Ptr = ST->getPointerOperand();
1931
1932 if (isUniform(Ptr))
1933 HasDependenceInvolvingLoopInvariantAddress |=
1934 !UniformStores.insert(Ptr).second;
1935
1936 // If we did *not* see this pointer before, insert it to the read-write
1937 // list. At this phase it is only a 'write' list.
1938 if (Seen.insert(Ptr).second) {
1939 ++NumReadWrites;
1940
1941 MemoryLocation Loc = MemoryLocation::get(ST);
1942 // The TBAA metadata could have a control dependency on the predication
1943 // condition, so we cannot rely on it when determining whether or not we
1944 // need runtime pointer checks.
1945 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1946 Loc.AATags.TBAA = nullptr;
1947
1948 Accesses.addStore(Loc);
1949 }
1950 }
1951
1952 if (IsAnnotatedParallel) {
1953 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
1954 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
1955 << "checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n"; } } while (false)
;
1956 CanVecMem = true;
1957 return;
1958 }
1959
1960 for (LoadInst *LD : Loads) {
1961 Value *Ptr = LD->getPointerOperand();
1962 // If we did *not* see this pointer before, insert it to the
1963 // read list. If we *did* see it before, then it is already in
1964 // the read-write list. This allows us to vectorize expressions
1965 // such as A[i] += x; Because the address of A[i] is a read-write
1966 // pointer. This only works if the index of A[i] is consecutive.
1967 // If the address of i is unknown (for example A[B[i]]) then we may
1968 // read a few words, modify, and write a few words, and some of the
1969 // words may be written to the same address.
1970 bool IsReadOnlyPtr = false;
1971 if (Seen.insert(Ptr).second ||
1972 !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1973 ++NumReads;
1974 IsReadOnlyPtr = true;
1975 }
1976
1977 // See if there is an unsafe dependency between a load to a uniform address and
1978 // store to the same uniform address.
1979 if (UniformStores.count(Ptr)) {
1980 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n"; } } while (
false)
1981 "load and uniform store to the same address!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n"; } } while (
false)
;
1982 HasDependenceInvolvingLoopInvariantAddress = true;
1983 }
1984
1985 MemoryLocation Loc = MemoryLocation::get(LD);
1986 // The TBAA metadata could have a control dependency on the predication
1987 // condition, so we cannot rely on it when determining whether or not we
1988 // need runtime pointer checks.
1989 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1990 Loc.AATags.TBAA = nullptr;
1991
1992 Accesses.addLoad(Loc, IsReadOnlyPtr);
1993 }
1994
1995 // If we write (or read-write) to a single destination and there are no
1996 // other reads in this loop then is it safe to vectorize.
1997 if (NumReadWrites == 1 && NumReads == 0) {
1998 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a write-only loop!\n"
; } } while (false)
;
1999 CanVecMem = true;
2000 return;
2001 }
2002
2003 // Build dependence sets and check whether we need a runtime pointer bounds
2004 // check.
2005 Accesses.buildDependenceSets();
2006
2007 // Find pointers with computable bounds. We are going to use this information
2008 // to place a runtime bound check.
2009 bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2010 TheLoop, SymbolicStrides);
2011 if (!CanDoRTIfNeeded) {
2012 recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2013 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n"; } } while (false)
2014 << "the array bounds.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n"; } } while (false)
;
2015 CanVecMem = false;
2016 return;
2017 }
2018
2019 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"
; } } while (false)
2020 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"
; } } while (false)
;
2021
2022 CanVecMem = true;
2023 if (Accesses.isDependencyCheckNeeded()) {
2024 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Checking memory dependencies\n"
; } } while (false)
;
2025 CanVecMem = DepChecker->areDepsSafe(
2026 DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2027 MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2028
2029 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2030 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Retrying with memory checks\n"
; } } while (false)
;
2031
2032 // Clear the dependency checks. We assume they are not needed.
2033 Accesses.resetDepChecks(*DepChecker);
2034
2035 PtrRtChecking->reset();
2036 PtrRtChecking->Need = true;
2037
2038 auto *SE = PSE->getSE();
2039 CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2040 SymbolicStrides, true);
2041
2042 // Check that we found the bounds for the pointer.
2043 if (!CanDoRTIfNeeded) {
2044 recordAnalysis("CantCheckMemDepsAtRunTime")
2045 << "cannot check memory dependencies at runtime";
2046 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Can't vectorize with memory checks\n"
; } } while (false)
;
2047 CanVecMem = false;
2048 return;
2049 }
2050
2051 CanVecMem = true;
2052 }
2053 }
2054
2055 if (HasConvergentOp) {
2056 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2057 << "cannot add control dependency to convergent operation";
2058 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n"; } } while (
false)
2059 "would be needed with a convergent operation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n"; } } while (
false)
;
2060 CanVecMem = false;
2061 return;
2062 }
2063
2064 if (CanVecMem)
2065 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2066 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2067 << (PtrRtChecking->Need ? "" : " don't")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
2068 << " need runtime memory checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"
; } } while (false)
;
2069 else {
2070 recordAnalysis("UnsafeMemDep")
2071 << "unsafe dependent memory operations in loop. Use "
2072 "#pragma loop distribute(enable) to allow loop distribution "
2073 "to attempt to isolate the offending operations into a separate "
2074 "loop";
2075 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: unsafe dependent memory operations in loop\n"
; } } while (false)
;
2076 }
2077}
2078
2079bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
2080 DominatorTree *DT) {
2081 assert(TheLoop->contains(BB) && "Unknown block used")((TheLoop->contains(BB) && "Unknown block used") ?
static_cast<void> (0) : __assert_fail ("TheLoop->contains(BB) && \"Unknown block used\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2081, __PRETTY_FUNCTION__))
;
2082
2083 // Blocks that do not dominate the latch need predication.
2084 BasicBlock* Latch = TheLoop->getLoopLatch();
2085 return !DT->dominates(BB, Latch);
2086}
2087
2088OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2089 Instruction *I) {
2090 assert(!Report && "Multiple reports generated")((!Report && "Multiple reports generated") ? static_cast
<void> (0) : __assert_fail ("!Report && \"Multiple reports generated\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2090, __PRETTY_FUNCTION__))
;
2091
2092 Value *CodeRegion = TheLoop->getHeader();
2093 DebugLoc DL = TheLoop->getStartLoc();
2094
2095 if (I) {
2096 CodeRegion = I->getParent();
2097 // If there is no debug location attached to the instruction, revert back to
2098 // using the loop's.
2099 if (I->getDebugLoc())
2100 DL = I->getDebugLoc();
2101 }
2102
2103 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE"loop-accesses", RemarkName, DL,
2104 CodeRegion);
2105 return *Report;
2106}
2107
2108bool LoopAccessInfo::isUniform(Value *V) const {
2109 auto *SE = PSE->getSE();
2110 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2111 // never considered uniform.
2112 // TODO: Is this really what we want? Even without FP SCEV, we may want some
2113 // trivially loop-invariant FP values to be considered uniform.
2114 if (!SE->isSCEVable(V->getType()))
2115 return false;
2116 return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2117}
2118
2119// FIXME: this function is currently a duplicate of the one in
2120// LoopVectorize.cpp.
2121static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
2122 Instruction *Loc) {
2123 if (FirstInst)
2124 return FirstInst;
2125 if (Instruction *I = dyn_cast<Instruction>(V))
2126 return I->getParent() == Loc->getParent() ? I : nullptr;
2127 return nullptr;
2128}
2129
2130namespace {
2131
2132/// IR Values for the lower and upper bounds of a pointer evolution. We
2133/// need to use value-handles because SCEV expansion can invalidate previously
2134/// expanded values. Thus expansion of a pointer can invalidate the bounds for
2135/// a previous one.
2136struct PointerBounds {
2137 TrackingVH<Value> Start;
2138 TrackingVH<Value> End;
2139};
2140
2141} // end anonymous namespace
2142
2143/// Expand code for the lower and upper bound of the pointer group \p CG
2144/// in \p TheLoop. \return the values for the bounds.
2145static PointerBounds
2146expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
2147 Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2148 const RuntimePointerChecking &PtrRtChecking) {
2149 Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2150 const SCEV *Sc = SE->getSCEV(Ptr);
2151
2152 unsigned AS = Ptr->getType()->getPointerAddressSpace();
2153 LLVMContext &Ctx = Loc->getContext();
2154
2155 // Use this type for pointer arithmetic.
2156 Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2157
2158 if (SE->isLoopInvariant(Sc, TheLoop)) {
2159 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
<< *Ptr << "\n"; } } while (false)
2160 << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
<< *Ptr << "\n"; } } while (false)
;
2161 // Ptr could be in the loop body. If so, expand a new one at the correct
2162 // location.
2163 Instruction *Inst = dyn_cast<Instruction>(Ptr);
2164 Value *NewPtr = (Inst && TheLoop->contains(Inst))
2165 ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2166 : Ptr;
2167 // We must return a half-open range, which means incrementing Sc.
2168 const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2169 Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2170 return {NewPtr, NewPtrPlusOne};
2171 } else {
2172 Value *Start = nullptr, *End = nullptr;
2173 LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Adding RT check for range:\n"
; } } while (false)
;
2174 Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2175 End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2176 LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->Highdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Start: " << *CG->
Low << " End: " << *CG->High << "\n"; } }
while (false)
2177 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "Start: " << *CG->
Low << " End: " << *CG->High << "\n"; } }
while (false)
;
2178 return {Start, End};
2179 }
2180}
2181
2182/// Turns a collection of checks into a collection of expanded upper and
2183/// lower bounds for both pointers in the check.
2184static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
2185 const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
2186 Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2187 const RuntimePointerChecking &PtrRtChecking) {
2188 SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
2189
2190 // Here we're relying on the SCEV Expander's cache to only emit code for the
2191 // same bounds once.
2192 transform(
2193 PointerChecks, std::back_inserter(ChecksWithBounds),
2194 [&](const RuntimePointerChecking::PointerCheck &Check) {
2195 PointerBounds
2196 First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2197 Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2198 return std::make_pair(First, Second);
2199 });
2200
2201 return ChecksWithBounds;
2202}
2203
2204std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2205 Instruction *Loc,
2206 const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
2207 const {
2208 const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2209 auto *SE = PSE->getSE();
2210 SCEVExpander Exp(*SE, DL, "induction");
2211 auto ExpandedChecks =
2212 expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2213
2214 LLVMContext &Ctx = Loc->getContext();
2215 Instruction *FirstInst = nullptr;
2216 IRBuilder<> ChkBuilder(Loc);
2217 // Our instructions might fold to a constant.
2218 Value *MemoryRuntimeCheck = nullptr;
2219
2220 for (const auto &Check : ExpandedChecks) {
2221 const PointerBounds &A = Check.first, &B = Check.second;
2222 // Check if two pointers (A and B) conflict where conflict is computed as:
2223 // start(A) <= end(B) && start(B) <= end(A)
2224 unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2225 unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2226
2227 assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2229, __PRETTY_FUNCTION__))
2228 (AS1 == A.End->getType()->getPointerAddressSpace()) &&(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2229, __PRETTY_FUNCTION__))
2229 "Trying to bounds check pointers with different address spaces")(((AS0 == B.End->getType()->getPointerAddressSpace()) &&
(AS1 == A.End->getType()->getPointerAddressSpace()) &&
"Trying to bounds check pointers with different address spaces"
) ? static_cast<void> (0) : __assert_fail ("(AS0 == B.End->getType()->getPointerAddressSpace()) && (AS1 == A.End->getType()->getPointerAddressSpace()) && \"Trying to bounds check pointers with different address spaces\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Analysis/LoopAccessAnalysis.cpp"
, 2229, __PRETTY_FUNCTION__))
;
2230
2231 Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2232 Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2233
2234 Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2235 Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2236 Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2237 Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2238
2239 // [A|B].Start points to the first accessed byte under base [A|B].
2240 // [A|B].End points to the last accessed byte, plus one.
2241 // There is no conflict when the intervals are disjoint:
2242 // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2243 //
2244 // bound0 = (B.Start < A.End)
2245 // bound1 = (A.Start < B.End)
2246 // IsConflict = bound0 & bound1
2247 Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2248 FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2249 Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2250 FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2251 Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2252 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2253 if (MemoryRuntimeCheck) {
2254 IsConflict =
2255 ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2256 FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2257 }
2258 MemoryRuntimeCheck = IsConflict;
2259 }
2260
2261 if (!MemoryRuntimeCheck)
2262 return std::make_pair(nullptr, nullptr);
2263
2264 // We have to do this trickery because the IRBuilder might fold the check to a
2265 // constant expression in which case there is no Instruction anchored in a
2266 // the block.
2267 Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2268 ConstantInt::getTrue(Ctx));
2269 ChkBuilder.Insert(Check, "memcheck.conflict");
2270 FirstInst = getFirstInst(FirstInst, Check, Loc);
2271 return std::make_pair(FirstInst, Check);
2272}
2273
2274std::pair<Instruction *, Instruction *>
2275LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
2276 if (!PtrRtChecking->Need)
2277 return std::make_pair(nullptr, nullptr);
2278
2279 return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2280}
2281
2282void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2283 Value *Ptr = nullptr;
2284 if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2285 Ptr = LI->getPointerOperand();
2286 else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2287 Ptr = SI->getPointerOperand();
2288 else
2289 return;
2290
2291 Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2292 if (!Stride)
2293 return;
2294
2295 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:"; } } while (false)
2296 "versioning:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:"; } } while (false)
;
2297 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << " Ptr: " << *Ptr <<
" Stride: " << *Stride << "\n"; } } while (false
)
;
2298
2299 // Avoid adding the "Stride == 1" predicate when we know that
2300 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2301 // or zero iteration loop, as Trip-Count <= Stride == 1.
2302 //
2303 // TODO: We are currently not making a very informed decision on when it is
2304 // beneficial to apply stride versioning. It might make more sense that the
2305 // users of this analysis (such as the vectorizer) will trigger it, based on
2306 // their specific cost considerations; For example, in cases where stride
2307 // versioning does not help resolving memory accesses/dependences, the
2308 // vectorizer should evaluate the cost of the runtime test, and the benefit
2309 // of various possible stride specializations, considering the alternatives
2310 // of using gather/scatters (if available).
2311
2312 const SCEV *StrideExpr = PSE->getSCEV(Stride);
2313 const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2314
2315 // Match the types so we can compare the stride and the BETakenCount.
2316 // The Stride can be positive/negative, so we sign extend Stride;
2317 // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2318 const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2319 uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2320 uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2321 const SCEV *CastedStride = StrideExpr;
2322 const SCEV *CastedBECount = BETakenCount;
2323 ScalarEvolution *SE = PSE->getSE();
2324 if (BETypeSize >= StrideTypeSize)
2325 CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2326 else
2327 CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2328 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2329 // Since TripCount == BackEdgeTakenCount + 1, checking:
2330 // "Stride >= TripCount" is equivalent to checking:
2331 // Stride - BETakenCount > 0
2332 if (SE->isKnownPositive(StrideMinusBETaken)) {
2333 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2334 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2335 "Stride==1 predicate will imply that the loop executes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
2336 "at most once.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes " "at most once.\n"
; } } while (false)
;
2337 return;
2338 }
2339 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("loop-accesses")) { dbgs() << "LAA: Found a strided access that we can version."
; } } while (false)
;
2340
2341 SymbolicStrides[Ptr] = Stride;
2342 StrideSet.insert(Stride);
2343}
2344
2345LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2346 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2347 DominatorTree *DT, LoopInfo *LI)
2348 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2349 PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2350 DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2351 NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2352 HasConvergentOp(false),
2353 HasDependenceInvolvingLoopInvariantAddress(false) {
2354 if (canAnalyzeLoop())
2355 analyzeLoop(AA, LI, TLI, DT);
2356}
2357
2358void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2359 if (CanVecMem) {
2360 OS.indent(Depth) << "Memory dependences are safe";
2361 if (MaxSafeDepDistBytes != -1ULL)
2362 OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2363 << " bytes";
2364 if (PtrRtChecking->Need)
2365 OS << " with run-time checks";
2366 OS << "\n";
2367 }
2368
2369 if (HasConvergentOp)
2370 OS.indent(Depth) << "Has convergent operation in loop\n";
2371
2372 if (Report)
2373 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2374
2375 if (auto *Dependences = DepChecker->getDependences()) {
2376 OS.indent(Depth) << "Dependences:\n";
2377 for (auto &Dep : *Dependences) {
2378 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2379 OS << "\n";
2380 }
2381 } else
2382 OS.indent(Depth) << "Too many dependences, not recorded\n";
2383
2384 // List the pair of accesses need run-time checks to prove independence.
2385 PtrRtChecking->print(OS, Depth);
2386 OS << "\n";
2387
2388 OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2389 << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2390 << "found in loop.\n";
2391
2392 OS.indent(Depth) << "SCEV assumptions:\n";
2393 PSE->getUnionPredicate().print(OS, Depth);
2394
2395 OS << "\n";
2396
2397 OS.indent(Depth) << "Expressions re-written:\n";
2398 PSE->print(OS, Depth);
2399}
2400
2401LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
2402 initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
2403}
2404
2405const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
2406 auto &LAI = LoopAccessInfoMap[L];
2407
2408 if (!LAI)
2409 LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2410
2411 return *LAI.get();
2412}
2413
2414void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
2415 LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2416
2417 for (Loop *TopLevelLoop : *LI)
2418 for (Loop *L : depth_first(TopLevelLoop)) {
2419 OS.indent(2) << L->getHeader()->getName() << ":\n";
2420 auto &LAI = LAA.getInfo(L);
2421 LAI.print(OS, 4);
2422 }
2423}
2424
2425bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
2426 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2427 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2428 TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2429 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2430 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2431 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2432
2433 return false;
2434}
2435
2436void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
2437 AU.addRequired<ScalarEvolutionWrapperPass>();
2438 AU.addRequired<AAResultsWrapperPass>();
2439 AU.addRequired<DominatorTreeWrapperPass>();
2440 AU.addRequired<LoopInfoWrapperPass>();
2441
2442 AU.setPreservesAll();
2443}
2444
2445char LoopAccessLegacyAnalysis::ID = 0;
2446static const char laa_name[] = "Loop Access Analysis";
2447#define LAA_NAME"loop-accesses" "loop-accesses"
2448
2449INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)static void *initializeLoopAccessLegacyAnalysisPassOnce(PassRegistry
&Registry) {
2450INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
2451INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
2452INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
2453INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
2454INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)PassInfo *PI = new PassInfo( laa_name, "loop-accesses", &
LoopAccessLegacyAnalysis::ID, PassInfo::NormalCtor_t(callDefaultCtor
<LoopAccessLegacyAnalysis>), false, true); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeLoopAccessLegacyAnalysisPassFlag
; void llvm::initializeLoopAccessLegacyAnalysisPass(PassRegistry
&Registry) { llvm::call_once(InitializeLoopAccessLegacyAnalysisPassFlag
, initializeLoopAccessLegacyAnalysisPassOnce, std::ref(Registry
)); }
2455
2456AnalysisKey LoopAccessAnalysis::Key;
2457
2458LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
2459 LoopStandardAnalysisResults &AR) {
2460 return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2461}
2462
2463namespace llvm {
2464
2465 Pass *createLAAPass() {
2466 return new LoopAccessLegacyAnalysis();
2467 }
2468
2469} // end namespace llvm